gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131
 132 #include "rtl.h"
 133 #include "basic-block.h"
 134 #include "diagnostic.h"
 135 #include "tree-flow.h"
 136 #include "tree-dump.h"
 137 #include "timevar.h"
 138 #include "cfgloop.h"
 139 #include "cfglayout.h"
 140 #include "expr.h"
 141 #include "optabs.h"
 142 #include "toplev.h"
 143 #include "tree-chrec.h"
 144 #include "tree-data-ref.h"
 145 #include "tree-scalar-evolution.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148 #include "langhooks.h"
 149
 150
 151 /*************************************************************************
 152   Simple Loop Peeling Utilities
 153  *************************************************************************/
 154
 155 /* Entry point for peeling of simple loops.
 156    Peel the first/last iterations of a loop.
 157    It can be used outside of the vectorizer for loops that are simple enough
 158    (see function documentation).  In the vectorizer it is used to peel the
 159    last few iterations when the loop bound is unknown or does not evenly
 160    divide by the vectorization factor, and to peel the first few iterations
 161    to force the alignment of data references in the loop.  */
 162 struct loop *slpeel_tree_peel_loop_to_edge
 163   (struct loop *, struct loops *, edge, tree, tree, bool);
 164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 165   (struct loop *, struct loops *, edge);
 166 static void slpeel_update_phis_for_duplicate_loop
 167   (struct loop *, struct loop *, bool after);
 168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
 169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
 170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
 172 static void allocate_new_names (bitmap);
 173 static void rename_use_op (use_operand_p);
 174 static void rename_def_op (def_operand_p, tree);
 175 static void rename_variables_in_bb (basic_block);
 176 static void free_new_names (bitmap);
 177 static void rename_variables_in_loop (struct loop *);
 178 #ifdef ENABLE_CHECKING
 179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
 180 #endif
 181
 182
 183 /*************************************************************************
 184   Vectorization Utilities.
 185  *************************************************************************/
 186
 187 /* Main analysis functions.  */
 188 static loop_vec_info vect_analyze_loop (struct loop *);
 189 static loop_vec_info vect_analyze_loop_form (struct loop *);
 190 static bool vect_analyze_data_refs (loop_vec_info);
 191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
 192 static bool vect_analyze_scalar_cycles (loop_vec_info);
 193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
 194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
 195 static bool vect_compute_data_refs_alignment (loop_vec_info);
 196 static bool vect_analyze_operations (loop_vec_info);
 197
 198 /* Main code transformation functions.  */
 199 static void vect_transform_loop (loop_vec_info, struct loops *);
 200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
 201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
 202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
 203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
 204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
 205 static enum dr_alignment_support vect_supportable_dr_alignment
 206   (struct data_reference *);
 207 static void vect_align_data_ref (tree);
 208 static void vect_enhance_data_refs_alignment (loop_vec_info);
 209
 210 /* Utility functions for the analyses.  */
 211 static bool vect_is_simple_use (tree , struct loop *, tree *);
 212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
 213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
 214 static void vect_mark_relevant (varray_type, tree);
 215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
 216 static tree vect_get_loop_niters (struct loop *, tree *);
 217 static bool vect_compute_data_ref_alignment
 218   (struct data_reference *, loop_vec_info);
 219 static bool vect_analyze_data_ref_access (struct data_reference *);
 220 static bool vect_get_first_index (tree, tree *);
 221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
 222 static struct data_reference * vect_analyze_pointer_ref_access
 223   (tree, tree, bool);
 224 static bool vect_can_advance_ivs_p (struct loop *);
 225 static tree vect_get_base_and_bit_offset
 226   (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
 227 static struct data_reference * vect_analyze_pointer_ref_access
 228   (tree, tree, bool);
 229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
 230 static tree vect_compute_array_ref_alignment
 231   (struct data_reference *, loop_vec_info, tree, tree *);
 232 static tree vect_get_ptr_offset (tree, tree, tree *);
 233 static tree vect_get_symbl_and_dr
 234   (tree, tree, bool, loop_vec_info, struct data_reference **);
 235
 236 /* Utility functions for the code transformation.  */
 237 static tree vect_create_destination_var (tree, tree);
 238 static tree vect_create_data_ref_ptr
 239   (tree, block_stmt_iterator *, tree, tree *, bool);
 240 static tree vect_create_index_for_vector_ref
 241   (struct loop *, block_stmt_iterator *);
 242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
 243 static tree get_vectype_for_scalar_type (tree);
 244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 245 static tree vect_get_vec_def_for_operand (tree, tree);
 246 static tree vect_init_vector (tree, tree);
 247 static void vect_finish_stmt_generation
 248   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
 249
 250 /* Utility function dealing with loop peeling (not peeling itself).  */
 251 static void vect_generate_tmps_on_preheader
 252   (loop_vec_info, tree *, tree *, tree *);
 253 static tree vect_build_loop_niters (loop_vec_info);
 254 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
 255 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
 256 static void vect_update_inits_of_dr
 257   (struct data_reference *, struct loop *, tree niters);
 258 static void vect_update_inits_of_drs (loop_vec_info, tree);
 259 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
 260 static void vect_do_peeling_for_loop_bound
 261   (loop_vec_info, tree *, struct loops *);
 262
 263 /* Utilities for creation and deletion of vec_info structs.  */
 264 loop_vec_info new_loop_vec_info (struct loop *loop);
 265 void destroy_loop_vec_info (loop_vec_info);
 266 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
 267
 268 static bool vect_debug_stats (struct loop *loop);
 269 static bool vect_debug_details (struct loop *loop);
 270
 271 \f
 272 /*************************************************************************
 273   Simple Loop Peeling Utilities
 274
 275   Utilities to support loop peeling for vectorization purposes.
 276  *************************************************************************/
 277
 278
 279 /* For each definition in DEFINITIONS this function allocates
 280    new ssa name.  */
 281
 282 static void
 283 allocate_new_names (bitmap definitions)
 284 {
 285   unsigned ver;
 286   bitmap_iterator bi;
 287
 288   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 289     {
 290       tree def = ssa_name (ver);
 291       tree *new_name_ptr = xmalloc (sizeof (tree));
 292
 293       bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
 294
 295       *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
 296       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
 297
 298       SSA_NAME_AUX (def) = new_name_ptr;
 299     }
 300 }
 301
 302
 303 /* Renames the use *OP_P.  */
 304
 305 static void
 306 rename_use_op (use_operand_p op_p)
 307 {
 308   tree *new_name_ptr;
 309
 310   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 311     return;
 312
 313   new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
 314
 315   /* Something defined outside of the loop.  */
 316   if (!new_name_ptr)
 317     return;
 318
 319   /* An ordinary ssa name defined in the loop.  */
 320
 321   SET_USE (op_p, *new_name_ptr);
 322 }
 323
 324
 325 /* Renames the def *OP_P in statement STMT.  */
 326
 327 static void
 328 rename_def_op (def_operand_p op_p, tree stmt)
 329 {
 330   tree *new_name_ptr;
 331
 332   if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
 333     return;
 334
 335   new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
 336
 337   /* Something defined outside of the loop.  */
 338   if (!new_name_ptr)
 339     return;
 340
 341   /* An ordinary ssa name defined in the loop.  */
 342
 343   SET_DEF (op_p, *new_name_ptr);
 344   SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
 345 }
 346
 347
 348 /* Renames the variables in basic block BB.  */
 349
 350 static void
 351 rename_variables_in_bb (basic_block bb)
 352 {
 353   tree phi;
 354   block_stmt_iterator bsi;
 355   tree stmt;
 356   stmt_ann_t ann;
 357   use_optype uses;
 358   vuse_optype vuses;
 359   def_optype defs;
 360   v_may_def_optype v_may_defs;
 361   v_must_def_optype v_must_defs;
 362   unsigned i;
 363   edge e;
 364   edge_iterator ei;
 365   struct loop *loop = bb->loop_father;
 366
 367   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 368     rename_def_op (PHI_RESULT_PTR (phi), phi);
 369
 370   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 371     {
 372       stmt = bsi_stmt (bsi);
 373       get_stmt_operands (stmt);
 374       ann = stmt_ann (stmt);
 375
 376       uses = USE_OPS (ann);
 377       for (i = 0; i < NUM_USES (uses); i++)
 378         rename_use_op (USE_OP_PTR (uses, i));
 379
 380       defs = DEF_OPS (ann);
 381       for (i = 0; i < NUM_DEFS (defs); i++)
 382         rename_def_op (DEF_OP_PTR (defs, i), stmt);
 383
 384       vuses = VUSE_OPS (ann);
 385       for (i = 0; i < NUM_VUSES (vuses); i++)
 386         rename_use_op (VUSE_OP_PTR (vuses, i));
 387
 388       v_may_defs = V_MAY_DEF_OPS (ann);
 389       for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
 390         {
 391           rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
 392           rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
 393         }
 394
 395       v_must_defs = V_MUST_DEF_OPS (ann);
 396       for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
 397         {
 398           rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
 399           rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
 400         }
 401     }
 402
 403   FOR_EACH_EDGE (e, ei, bb->succs)
 404     {
 405       if (!flow_bb_inside_loop_p (loop, e->dest))
 406         continue;
 407       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 408         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 409     }
 410 }
 411
 412
 413 /* Releases the structures holding the new ssa names.  */
 414
 415 static void
 416 free_new_names (bitmap definitions)
 417 {
 418   unsigned ver;
 419   bitmap_iterator bi;
 420
 421   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 422     {
 423       tree def = ssa_name (ver);
 424
 425       if (SSA_NAME_AUX (def))
 426         {
 427           free (SSA_NAME_AUX (def));
 428           SSA_NAME_AUX (def) = NULL;
 429         }
 430     }
 431 }
 432
 433
 434 /* Renames variables in new generated LOOP.  */
 435
 436 static void
 437 rename_variables_in_loop (struct loop *loop)
 438 {
 439   unsigned i;
 440   basic_block *bbs;
 441
 442   bbs = get_loop_body (loop);
 443
 444   for (i = 0; i < loop->num_nodes; i++)
 445     rename_variables_in_bb (bbs[i]);
 446
 447   free (bbs);
 448 }
 449
 450
 451 /* Update the PHI nodes of NEW_LOOP.
 452
 453    NEW_LOOP is a duplicate of ORIG_LOOP.
 454    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 455    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 456    executes before it.  */
 457
 458 static void
 459 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 460                                        struct loop *new_loop, bool after)
 461 {
 462   tree *new_name_ptr, new_ssa_name;
 463   tree phi_new, phi_orig;
 464   tree def;
 465   edge orig_loop_latch = loop_latch_edge (orig_loop);
 466   edge orig_entry_e = loop_preheader_edge (orig_loop);
 467   edge new_loop_exit_e = new_loop->exit_edges[0];
 468   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 469   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 470
 471   /*
 472      step 1. For each loop-header-phi:
 473              Add the first phi argument for the phi in NEW_LOOP
 474             (the one associated with the entry of NEW_LOOP)
 475
 476      step 2. For each loop-header-phi:
 477              Add the second phi argument for the phi in NEW_LOOP
 478             (the one associated with the latch of NEW_LOOP)
 479
 480      step 3. Update the phis in the successor block of NEW_LOOP.
 481
 482         case 1: NEW_LOOP was placed before ORIG_LOOP:
 483                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 484                 Updating the phis in the successor block can therefore be done
 485                 along with the scanning of the loop header phis, because the
 486                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 487                 phi nodes, organized in the same order.
 488
 489         case 2: NEW_LOOP was placed after ORIG_LOOP:
 490                 The successor block of NEW_LOOP is the original exit block of
 491                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 492                 We postpone updating these phis to a later stage (when
 493                 loop guards are added).
 494    */
 495
 496
 497   /* Scan the phis in the headers of the old and new loops
 498      (they are organized in exactly the same order).  */
 499
 500   for (phi_new = phi_nodes (new_loop->header),
 501        phi_orig = phi_nodes (orig_loop->header);
 502        phi_new && phi_orig;
 503        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 504     {
 505       /* step 1.  */
 506       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 507       add_phi_arg (phi_new, def, new_loop_entry_e);
 508
 509       /* step 2.  */
 510       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 511       if (TREE_CODE (def) != SSA_NAME)
 512         continue;
 513
 514       new_name_ptr = SSA_NAME_AUX (def);
 515       if (!new_name_ptr)
 516         /* Something defined outside of the loop.  */
 517         continue;
 518
 519       /* An ordinary ssa name defined in the loop.  */
 520       new_ssa_name = *new_name_ptr;
 521       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 522
 523       /* step 3 (case 1).  */
 524       if (!after)
 525         {
 526           gcc_assert (new_loop_exit_e == orig_entry_e);
 527           SET_PHI_ARG_DEF (phi_orig,
 528                            phi_arg_from_edge (phi_orig, new_loop_exit_e),
 529                            new_ssa_name);
 530         }
 531     }
 532 }
 533
 534
 535 /* Update PHI nodes for a guard of the LOOP.
 536
 537    Input:
 538    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 539         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 540         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 541         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 542         We denote this bb NEW_MERGE_BB because it had a single predecessor (the
 543         LOOP header) before the guard code was added, and now it became a merge
 544         point of two paths - the path that ends with the LOOP exit-edge, and
 545         the path that ends with GUARD_EDGE.
 546
 547         This function creates and updates the relevant phi nodes to account for
 548         the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
 549         1. Create phi nodes at NEW_MERGE_BB.
 550         2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 551            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 552            was added:
 553
 554         ===> The CFG before the guard-code was added:
 555         LOOP_header_bb:
 556           if (exit_loop) goto update_bb : LOOP_header_bb
 557         update_bb:
 558
 559         ==> The CFG after the guard-code was added:
 560         guard_bb:
 561           if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
 562         LOOP_header_bb:
 563           if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
 564         new_merge_bb:
 565           goto update_bb
 566         update_bb:
 567
 568    - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
 569         UPDATE_BB are loop entry phis, like the phis in the LOOP header,
 570         organized in the same order.
 571         If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
 572         loop exit phis.
 573
 574    - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
 575         "original" loop).  FALSE if LOOP is an original loop (not a newly
 576         created copy).  The SSA_NAME_AUX fields of the defs in the original
 577         loop are the corresponding new ssa-names used in the new duplicated
 578         loop copy.  IS_NEW_LOOP indicates which of the two args of the phi
 579         nodes in UPDATE_BB takes the original ssa-name, and which takes the
 580         new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
 581         the LOOP-exit-edge takes the new-name, and the phi-arg that is
 582         associated with GUARD_EDGE takes the original name.  If IS_NEW_LOOP is
 583         FALSE, it's the other way around.
 584   */
 585
 586 static void
 587 slpeel_update_phi_nodes_for_guard (edge guard_edge,
 588                                    struct loop *loop,
 589                                    bool entry_phis,
 590                                    bool is_new_loop)
 591 {
 592   tree orig_phi, new_phi, update_phi;
 593   tree guard_arg, loop_arg;
 594   basic_block new_merge_bb = guard_edge->dest;
 595   edge e = EDGE_SUCC (new_merge_bb, 0);
 596   basic_block update_bb = e->dest;
 597   basic_block orig_bb = (entry_phis ? loop->header : update_bb);
 598
 599   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 600        orig_phi && update_phi;
 601        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 602     {
 603       /* 1. Generate new phi node in NEW_MERGE_BB:  */
 604       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 605                                  new_merge_bb);
 606
 607       /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 608             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 609       if (entry_phis)
 610         {
 611           loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 612                                             EDGE_SUCC (loop->latch, 0));
 613           guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
 614         }
 615       else /* exit phis */
 616         {
 617           tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 618           tree *new_name_ptr = SSA_NAME_AUX (orig_def);
 619           tree new_name;
 620
 621           if (new_name_ptr)
 622             new_name = *new_name_ptr;
 623           else
 624             /* Something defined outside of the loop  */
 625             new_name = orig_def;
 626
 627           if (is_new_loop)
 628             {
 629               guard_arg = orig_def;
 630               loop_arg = new_name;
 631             }
 632           else
 633             {
 634               guard_arg = new_name;
 635               loop_arg = orig_def;
 636             }
 637         }
 638       add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
 639       add_phi_arg (new_phi, guard_arg, guard_edge);
 640
 641       /* 3. Update phi in successor block.  */
 642       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 643                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 644       SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
 645                        PHI_RESULT (new_phi));
 646     }
 647
 648   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 649 }
 650
 651
 652 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 653    that starts at zero, increases by one and its limit is NITERS.
 654
 655    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 656
 657 static void
 658 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 659 {
 660   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 661   tree orig_cond;
 662   edge exit_edge = loop->exit_edges[0];
 663   block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
 664   tree begin_label = tree_block_label (loop->latch);
 665   tree exit_label = tree_block_label (loop->single_exit->dest);
 666   tree init = build_int_cst (TREE_TYPE (niters), 0);
 667   tree step = build_int_cst (TREE_TYPE (niters), 1);
 668
 669   orig_cond = get_loop_exit_condition (loop);
 670   gcc_assert (orig_cond);
 671   create_iv (init, step, NULL_TREE, loop,
 672              &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
 673
 674   /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
 675      back to the exit condition statement.  */
 676   bsi_next (&loop_exit_bsi);
 677   gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
 678
 679   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 680     cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 681   else /* 'then' edge loops back.  */
 682     cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 683
 684   begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 685   exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 686   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 687                      begin_label, exit_label);
 688   bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
 689
 690   /* Remove old loop exit test:  */
 691   bsi_remove (&loop_exit_bsi);
 692
 693   if (vect_debug_stats (loop) || vect_debug_details (loop))
 694     print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 695
 696   loop->nb_iterations = niters;
 697 }
 698
 699
 700 /* Given LOOP this function generates a new copy of it and puts it
 701    on E which is either the entry or exit of LOOP.  */
 702
 703 static struct loop *
 704 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 705                                         edge e)
 706 {
 707   struct loop *new_loop;
 708   basic_block *new_bbs, *bbs;
 709   bool at_exit;
 710   bool was_imm_dom;
 711   basic_block exit_dest;
 712   tree phi, phi_arg;
 713
 714   at_exit = (e == loop->exit_edges[0]);
 715   if (!at_exit && e != loop_preheader_edge (loop))
 716     {
 717       if (dump_file && (dump_flags & TDF_DETAILS))
 718           fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
 719       return NULL;
 720     }
 721
 722   bbs = get_loop_body (loop);
 723
 724   /* Check whether duplication is possible.  */
 725   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 726     {
 727       if (vect_debug_stats (loop) || vect_debug_details (loop))
 728           fprintf (dump_file, "Cannot copy basic blocks.\n");
 729       free (bbs);
 730       return NULL;
 731     }
 732
 733   /* Generate new loop structure.  */
 734   new_loop = duplicate_loop (loops, loop, loop->outer);
 735   if (!new_loop)
 736     {
 737       if (vect_debug_stats (loop) || vect_debug_details (loop))
 738           fprintf (dump_file, "duplicate_loop returns NULL.\n");
 739       free (bbs);
 740       return NULL;
 741     }
 742
 743   exit_dest = loop->exit_edges[0]->dest;
 744   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 745                                           exit_dest) == loop->header ?
 746                  true : false);
 747
 748   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 749
 750   copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
 751
 752   /* Duplicating phi args at exit bbs as coming
 753      also from exit of duplicated loop.  */
 754   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 755     {
 756       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
 757       if (phi_arg)
 758         {
 759           edge new_loop_exit_edge;
 760
 761           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 762             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 763           else
 764             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 765
 766           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 767         }
 768     }
 769
 770   if (at_exit) /* Add the loop copy at exit.  */
 771     {
 772       redirect_edge_and_branch_force (e, new_loop->header);
 773       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 774       if (was_imm_dom)
 775         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 776     }
 777   else /* Add the copy at entry.  */
 778     {
 779       edge new_exit_e;
 780       edge entry_e = loop_preheader_edge (loop);
 781       basic_block preheader = entry_e->src;
 782
 783       if (!flow_bb_inside_loop_p (new_loop,
 784                                   EDGE_SUCC (new_loop->header, 0)->dest))
 785         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 786       else
 787         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 788
 789       redirect_edge_and_branch_force (new_exit_e, loop->header);
 790       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 791                                new_exit_e->src);
 792
 793       /* We have to add phi args to the loop->header here as coming
 794          from new_exit_e edge.  */
 795       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 796         {
 797           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 798           if (phi_arg)
 799             add_phi_arg (phi, phi_arg, new_exit_e);
 800         }
 801
 802       redirect_edge_and_branch_force (entry_e, new_loop->header);
 803       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 804     }
 805
 806   flow_loop_scan (new_loop, LOOP_ALL);
 807   flow_loop_scan (loop, LOOP_ALL);
 808   free (new_bbs);
 809   free (bbs);
 810
 811   return new_loop;
 812 }
 813
 814
 815 /* Given the condition statement COND, put it as the last statement
 816    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 817    Assumes that this is the single exit of the guarded loop.
 818    Returns the skip edge.  */
 819
 820 static edge
 821 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 822                         basic_block dom_bb)
 823 {
 824   block_stmt_iterator bsi;
 825   edge new_e, enter_e;
 826   tree cond_stmt, then_label, else_label;
 827
 828   enter_e = EDGE_SUCC (guard_bb, 0);
 829   enter_e->flags &= ~EDGE_FALLTHRU;
 830   enter_e->flags |= EDGE_FALSE_VALUE;
 831   bsi = bsi_last (guard_bb);
 832
 833   then_label = build1 (GOTO_EXPR, void_type_node,
 834                        tree_block_label (exit_bb));
 835   else_label = build1 (GOTO_EXPR, void_type_node,
 836                        tree_block_label (enter_e->dest));
 837   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 838                      then_label, else_label);
 839   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 840   /* Add new edge to connect entry block to the second loop.  */
 841   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 842   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 843   return new_e;
 844 }
 845
 846
 847 /* This function verifies that the following restrictions apply to LOOP:
 848    (1) it is innermost
 849    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 850    (3) it is single entry, single exit
 851    (4) its exit condition is the last stmt in the header
 852    (5) E is the entry/exit edge of LOOP.
 853  */
 854
 855 static bool
 856 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 857 {
 858   edge exit_e = loop->exit_edges [0];
 859   edge entry_e = loop_preheader_edge (loop);
 860   tree orig_cond = get_loop_exit_condition (loop);
 861   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 862
 863   if (any_marked_for_rewrite_p ())
 864     return false;
 865
 866   if (loop->inner
 867       /* All loops have an outer scope; the only case loop->outer is NULL is for
 868          the function itself.  */
 869       || !loop->outer
 870       || loop->num_nodes != 2
 871       || !empty_block_p (loop->latch)
 872       || loop->num_exits != 1
 873       || loop->num_entries != 1
 874       /* Verify that new loop exit condition can be trivially modified.  */
 875       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 876       || (e != exit_e && e != entry_e))
 877     return false;
 878
 879   return true;
 880 }
 881
 882 #ifdef ENABLE_CHECKING
 883 static void
 884 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 885                                  struct loop *second_loop)
 886 {
 887   basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
 888   basic_block loop2_entry_bb = second_loop->pre_header;
 889   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 890
 891   /* A guard that controls whether the second_loop is to be executed or skipped
 892      is placed in first_loop->exit.  first_loopt->exit therefore has two
 893      successors - one is the preheader of second_loop, and the other is a bb
 894      after second_loop.
 895    */
 896   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 897
 898
 899   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 900         of second_loop.  */
 901
 902   /* The preheader of new_loop is expected to have two predessors:
 903      first_loop->exit and the block that precedes first_loop.  */
 904
 905   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 906               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 907                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 908                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 909                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 910
 911   /* Verify that the other successor of first_loopt->exit is after the
 912      second_loop.  */
 913   /* TODO */
 914 }
 915 #endif
 916
 917 /* Function slpeel_tree_peel_loop_to_edge.
 918
 919    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 920    that is placed on the entry (exit) edge E of LOOP. After this transformation
 921    we have two loops one after the other - first-loop iterates FIRST_NITERS
 922    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 923
 924    Input:
 925    - LOOP: the loop to be peeled.
 926    - E: the exit or entry edge of LOOP.
 927         If it is the entry edge, we peel the first iterations of LOOP. In this
 928         case first-loop is LOOP, and second-loop is the newly created loop.
 929         If it is the exit edge, we peel the last iterations of LOOP. In this
 930         case, first-loop is the newly created loop, and second-loop is LOOP.
 931    - NITERS: the number of iterations that LOOP iterates.
 932    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 933    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 934         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 935         is false, the caller of this function may want to take care of this
 936         (this can be useful if we don't want new stmts added to first-loop).
 937
 938    Output:
 939    The function returns a pointer to the new loop-copy, or NULL if it failed
 940    to perform the transformation.
 941
 942    The function generates two if-then-else guards: one before the first loop,
 943    and the other before the second loop:
 944    The first guard is:
 945      if (FIRST_NITERS == 0) then skip the first loop,
 946      and go directly to the second loop.
 947    The second guard is:
 948      if (FIRST_NITERS == NITERS) then skip the second loop.
 949
 950    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
 951    FORNOW the resulting code will not be in loop-closed-ssa form.
 952 */
 953
 954 struct loop*
 955 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
 956                                edge e, tree first_niters,
 957                                tree niters, bool update_first_loop_count)
 958 {
 959   struct loop *new_loop = NULL, *first_loop, *second_loop;
 960   edge skip_e;
 961   tree pre_condition;
 962   bitmap definitions;
 963   basic_block bb_before_second_loop, bb_after_second_loop;
 964   basic_block bb_before_first_loop;
 965   basic_block bb_between_loops;
 966   edge exit_e = loop->exit_edges [0];
 967
 968   if (!slpeel_can_duplicate_loop_p (loop, e))
 969     return NULL;
 970
 971   /* We have to initialize cfg_hooks. Then, when calling
 972    cfg_hooks->split_edge, the function tree_split_edge
 973    is actually called and, when calling cfg_hooks->duplicate_block,
 974    the function tree_duplicate_bb is called.  */
 975   tree_register_cfg_hooks ();
 976
 977
 978   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
 979         Resulting CFG would be:
 980
 981         first_loop:
 982         do {
 983         } while ...
 984
 985         second_loop:
 986         do {
 987         } while ...
 988
 989         orig_exit_bb:
 990    */
 991
 992   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
 993     {
 994       if (vect_debug_stats (loop) || vect_debug_details (loop))
 995         fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
 996       return NULL;
 997     }
 998
 999   if (e == exit_e)
1000     {
1001       /* NEW_LOOP was placed after LOOP.  */
1002       first_loop = loop;
1003       second_loop = new_loop;
1004     }
1005   else
1006     {
1007       /* NEW_LOOP was placed before LOOP.  */
1008       first_loop = new_loop;
1009       second_loop = loop;
1010     }
1011
1012   definitions = marked_ssa_names ();
1013   allocate_new_names (definitions);
1014   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1015   rename_variables_in_loop (new_loop);
1016
1017
1018   /* 2. Add the guard that controls whether the first loop is executed.
1019         Resulting CFG would be:
1020
1021         bb_before_first_loop:
1022         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1023                                GOTO first-loop
1024
1025         first_loop:
1026         do {
1027         } while ...
1028
1029         bb_before_second_loop:
1030
1031         second_loop:
1032         do {
1033         } while ...
1034
1035         orig_exit_bb:
1036    */
1037
1038   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1039   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1040   bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1041   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1042   flow_loop_scan (first_loop, LOOP_ALL);
1043   flow_loop_scan (second_loop, LOOP_ALL);
1044
1045   pre_condition =
1046         build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1047   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1048                                   bb_before_second_loop, bb_before_first_loop);
1049   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1050                                      first_loop == new_loop);
1051
1052
1053   /* 3. Add the guard that controls whether the second loop is executed.
1054         Resulting CFG would be:
1055
1056         bb_before_first_loop:
1057         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1058                                GOTO first-loop
1059
1060         first_loop:
1061         do {
1062         } while ...
1063
1064         bb_between_loops:
1065         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1066                                     GOTO bb_before_second_loop
1067
1068         bb_before_second_loop:
1069
1070         second_loop:
1071         do {
1072         } while ...
1073
1074         bb_after_second_loop:
1075
1076         orig_exit_bb:
1077    */
1078
1079   bb_between_loops = split_edge (first_loop->exit_edges[0]);
1080   add_bb_to_loop (bb_between_loops, first_loop->outer);
1081   bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1082   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1083   flow_loop_scan (first_loop, LOOP_ALL);
1084   flow_loop_scan (second_loop, LOOP_ALL);
1085
1086   pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1087   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1088                                   bb_after_second_loop, bb_before_first_loop);
1089   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1090                                      second_loop == new_loop);
1091
1092   /* Flow loop scan does not update loop->single_exit field.  */
1093   first_loop->single_exit = first_loop->exit_edges[0];
1094   second_loop->single_exit = second_loop->exit_edges[0];
1095
1096   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1097    */
1098   if (update_first_loop_count)
1099     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1100
1101   free_new_names (definitions);
1102   BITMAP_XFREE (definitions);
1103   unmark_all_for_rewrite ();
1104
1105   return new_loop;
1106 }
1107
1108 \f
1109 /* Here the proper Vectorizer starts.  */
1110
1111 /*************************************************************************
1112   Vectorization Utilities.
1113  *************************************************************************/
1114
1115 /* Function new_stmt_vec_info.
1116
1117    Create and initialize a new stmt_vec_info struct for STMT.  */
1118
1119 stmt_vec_info
1120 new_stmt_vec_info (tree stmt, struct loop *loop)
1121 {
1122   stmt_vec_info res;
1123   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1124
1125   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1126   STMT_VINFO_STMT (res) = stmt;
1127   STMT_VINFO_LOOP (res) = loop;
1128   STMT_VINFO_RELEVANT_P (res) = 0;
1129   STMT_VINFO_VECTYPE (res) = NULL;
1130   STMT_VINFO_VEC_STMT (res) = NULL;
1131   STMT_VINFO_DATA_REF (res) = NULL;
1132   STMT_VINFO_MEMTAG (res) = NULL;
1133   STMT_VINFO_VECT_DR_BASE (res) = NULL;
1134
1135   return res;
1136 }
1137
1138
1139 /* Function new_loop_vec_info.
1140
1141    Create and initialize a new loop_vec_info struct for LOOP, as well as
1142    stmt_vec_info structs for all the stmts in LOOP.  */
1143
1144 loop_vec_info
1145 new_loop_vec_info (struct loop *loop)
1146 {
1147   loop_vec_info res;
1148   basic_block *bbs;
1149   block_stmt_iterator si;
1150   unsigned int i;
1151
1152   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1153
1154   bbs = get_loop_body (loop);
1155
1156   /* Create stmt_info for all stmts in the loop.  */
1157   for (i = 0; i < loop->num_nodes; i++)
1158     {
1159       basic_block bb = bbs[i];
1160       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1161         {
1162           tree stmt = bsi_stmt (si);
1163           stmt_ann_t ann;
1164
1165           get_stmt_operands (stmt);
1166           ann = stmt_ann (stmt);
1167           set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1168         }
1169     }
1170
1171   LOOP_VINFO_LOOP (res) = loop;
1172   LOOP_VINFO_BBS (res) = bbs;
1173   LOOP_VINFO_EXIT_COND (res) = NULL;
1174   LOOP_VINFO_NITERS (res) = NULL;
1175   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1176   LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1177   LOOP_VINFO_VECT_FACTOR (res) = 0;
1178   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1179                            "loop_write_datarefs");
1180   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1181                            "loop_read_datarefs");
1182   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1183
1184   return res;
1185 }
1186
1187
1188 /* Function destroy_loop_vec_info.
1189
1190    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1191    stmts in the loop.  */
1192
1193 void
1194 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1195 {
1196   struct loop *loop;
1197   basic_block *bbs;
1198   int nbbs;
1199   block_stmt_iterator si;
1200   int j;
1201
1202   if (!loop_vinfo)
1203     return;
1204
1205   loop = LOOP_VINFO_LOOP (loop_vinfo);
1206
1207   bbs = LOOP_VINFO_BBS (loop_vinfo);
1208   nbbs = loop->num_nodes;
1209
1210   for (j = 0; j < nbbs; j++)
1211     {
1212       basic_block bb = bbs[j];
1213       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1214         {
1215           tree stmt = bsi_stmt (si);
1216           stmt_ann_t ann = stmt_ann (stmt);
1217           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1218           free (stmt_info);
1219           set_stmt_info (ann, NULL);
1220         }
1221     }
1222
1223   free (LOOP_VINFO_BBS (loop_vinfo));
1224   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1225   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1226
1227   free (loop_vinfo);
1228 }
1229
1230
1231 /* Function debug_loop_stats.
1232
1233    For vectorization statistics dumps.  */
1234
1235 static bool
1236 vect_debug_stats (struct loop *loop)
1237 {
1238   basic_block bb;
1239   block_stmt_iterator si;
1240   tree node = NULL_TREE;
1241
1242   if (!dump_file || !(dump_flags & TDF_STATS))
1243     return false;
1244
1245   if (!loop)
1246     {
1247       fprintf (dump_file, "\n");
1248       return true;
1249     }
1250
1251   if (!loop->header)
1252     return false;
1253
1254   bb = loop->header;
1255
1256   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1257     {
1258       node = bsi_stmt (si);
1259       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1260         break;
1261     }
1262
1263   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1264       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1265     {
1266       fprintf (dump_file, "\nloop at %s:%d: ",
1267         EXPR_FILENAME (node), EXPR_LINENO (node));
1268       return true;
1269     }
1270
1271   return false;
1272 }
1273
1274
1275 /* Function debug_loop_details.
1276
1277    For vectorization debug dumps.  */
1278
1279 static bool
1280 vect_debug_details (struct loop *loop)
1281 {
1282    basic_block bb;
1283    block_stmt_iterator si;
1284    tree node = NULL_TREE;
1285
1286   if (!dump_file || !(dump_flags & TDF_DETAILS))
1287     return false;
1288
1289   if (!loop)
1290     {
1291       fprintf (dump_file, "\n");
1292       return true;
1293     }
1294
1295   if (!loop->header)
1296     return false;
1297
1298   bb = loop->header;
1299
1300   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1301     {
1302       node = bsi_stmt (si);
1303       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1304         break;
1305     }
1306
1307   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1308       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1309     {
1310       fprintf (dump_file, "\nloop at %s:%d: ",
1311                EXPR_FILENAME (node), EXPR_LINENO (node));
1312       return true;
1313     }
1314
1315   return false;
1316 }
1317
1318
1319 /* Function vect_get_ptr_offset
1320
1321    Compute the OFFSET modulo vector-type alignment of pointer REF in bits.  */
1322
1323 static tree
1324 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1325                      tree vectype ATTRIBUTE_UNUSED,
1326                      tree *offset ATTRIBUTE_UNUSED)
1327 {
1328   /* TODO: Use alignment information.  */
1329   return NULL_TREE;
1330 }
1331
1332
1333 /* Function vect_get_base_and_bit_offset
1334
1335    Return the BASE of the data reference EXPR.
1336    If VECTYPE is given, also compute the OFFSET from BASE in bits.
1337    E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1338    bits of 'a.b[i] + 4B' from a.
1339
1340    Input:
1341    EXPR - the memory reference that is being analyzed
1342    DR - the data_reference struct of the _original_ memory reference
1343         (Note: DR_REF (DR) is not necessarily EXPR)
1344    VECTYPE - the type that defines the alignment (i.e, we compute
1345              alignment relative to TYPE_ALIGN(VECTYPE))
1346
1347    Output:
1348    BASE (returned value) - the base of the data reference EXPR.
1349                            E.g, if EXPR is a.b[k].c[i][j] the returned
1350                            base is a.
1351    OFFSET - offset of EXPR from BASE in bits
1352    BASE_ALIGNED_P - indicates if BASE is aligned
1353
1354    If something unexpected is encountered (an unsupported form of data-ref),
1355    or if VECTYPE is given but OFFSET cannot be determined:
1356    then NULL_TREE is returned.  */
1357
1358 static tree
1359 vect_get_base_and_bit_offset (struct data_reference *dr,
1360                               tree expr,
1361                               tree vectype,
1362                               loop_vec_info loop_vinfo,
1363                               tree *offset,
1364                               bool *base_aligned_p)
1365 {
1366   tree this_offset = size_zero_node;
1367   tree base = NULL_TREE;
1368   tree next_ref;
1369   tree oprnd0, oprnd1;
1370   struct data_reference *array_dr;
1371   enum tree_code code = TREE_CODE (expr);
1372
1373   *base_aligned_p = false;
1374
1375   switch (code)
1376     {
1377     /* These cases end the recursion:  */
1378     case VAR_DECL:
1379       *offset = size_zero_node;
1380       if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1381         *base_aligned_p = true;
1382       return expr;
1383
1384     case SSA_NAME:
1385       if (!vectype)
1386         return expr;
1387
1388       if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1389         return NULL_TREE;
1390
1391       if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1392         {
1393           base = vect_get_ptr_offset (expr, vectype, offset);
1394           if (base)
1395             *base_aligned_p = true;
1396         }
1397       else
1398         {
1399           *base_aligned_p = true;
1400           *offset = size_zero_node;
1401           base = expr;
1402         }
1403       return base;
1404
1405     case INTEGER_CST:
1406       *offset = int_const_binop (MULT_EXPR, expr,
1407                                  build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1408       return expr;
1409
1410     /* These cases continue the recursion:  */
1411     case COMPONENT_REF:
1412       oprnd0 = TREE_OPERAND (expr, 0);
1413       oprnd1 = TREE_OPERAND (expr, 1);
1414
1415       this_offset = bit_position (oprnd1);
1416       if (vectype && !host_integerp (this_offset, 1))
1417         return NULL_TREE;
1418       next_ref = oprnd0;
1419       break;
1420
1421     case ADDR_EXPR:
1422       oprnd0 = TREE_OPERAND (expr, 0);
1423       next_ref = oprnd0;
1424       break;
1425
1426     case INDIRECT_REF:
1427       oprnd0 = TREE_OPERAND (expr, 0);
1428       next_ref = oprnd0;
1429       break;
1430
1431     case ARRAY_REF:
1432       if (DR_REF (dr) != expr)
1433         /* Build array data_reference struct if the existing DR_REF
1434            doesn't match EXPR. This happens, for example, when the
1435            EXPR is *T and T is initialized to &arr[indx]. The DR struct
1436            contains information on the access of T, not of arr. In order
1437            to continue  the analysis, we create a new DR struct that
1438            describes the access of arr.
1439         */
1440         array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1441       else
1442         array_dr = dr;
1443
1444       next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1445                                                    vectype, &this_offset);
1446       if (!next_ref)
1447         return NULL_TREE;
1448
1449       if (vectype &&
1450           TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1451         {
1452           *offset = this_offset;
1453           *base_aligned_p = true;
1454           return next_ref;
1455         }
1456       break;
1457
1458     case PLUS_EXPR:
1459     case MINUS_EXPR:
1460       /* In case we have a PLUS_EXPR of the form
1461          (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1462          This is verified in  vect_get_symbl_and_dr.  */
1463       oprnd0 = TREE_OPERAND (expr, 0);
1464       oprnd1 = TREE_OPERAND (expr, 1);
1465
1466       base = vect_get_base_and_bit_offset
1467         (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1468       if (vectype && !base)
1469         return NULL_TREE;
1470
1471       next_ref = oprnd0;
1472       break;
1473
1474     default:
1475       return NULL_TREE;
1476     }
1477
1478   base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1479                                        loop_vinfo, offset, base_aligned_p);
1480
1481   if (vectype && base)
1482     {
1483       *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1484       if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1485         return NULL_TREE;
1486
1487       if (vect_debug_details (NULL))
1488         {
1489           print_generic_expr (dump_file, expr, TDF_SLIM);
1490           fprintf (dump_file, " --> total offset for ref: ");
1491           print_generic_expr (dump_file, *offset, TDF_SLIM);
1492         }
1493     }
1494   return base;
1495 }
1496
1497
1498 /* Function vect_force_dr_alignment_p.
1499
1500    Returns whether the alignment of a DECL can be forced to be aligned
1501    on ALIGNMENT bit boundary.  */
1502
1503 static bool
1504 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1505 {
1506   if (TREE_CODE (decl) != VAR_DECL)
1507     return false;
1508
1509   if (DECL_EXTERNAL (decl))
1510     return false;
1511
1512   if (TREE_STATIC (decl))
1513     return (alignment <= MAX_OFILE_ALIGNMENT);
1514   else
1515     /* This is not 100% correct.  The absolute correct stack alignment
1516        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1517        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1518        However, until someone implements forced stack alignment, SSE
1519        isn't really usable without this.  */
1520     return (alignment <= PREFERRED_STACK_BOUNDARY);
1521 }
1522
1523
1524 /* Function vect_get_new_vect_var.
1525
1526    Returns a name for a new variable. The current naming scheme appends the
1527    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1528    the name of vectorizer generated variables, and appends that to NAME if
1529    provided.  */
1530
1531 static tree
1532 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1533 {
1534   const char *prefix;
1535   int prefix_len;
1536   tree new_vect_var;
1537
1538   if (var_kind == vect_simple_var)
1539     prefix = "vect_";
1540   else
1541     prefix = "vect_p";
1542
1543   prefix_len = strlen (prefix);
1544
1545   if (name)
1546     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1547   else
1548     new_vect_var = create_tmp_var (type, prefix);
1549
1550   return new_vect_var;
1551 }
1552
1553
1554 /* Function vect_create_index_for_vector_ref.
1555
1556    Create (and return) an index variable, along with it's update chain in the
1557    loop. This variable will be used to access a memory location in a vector
1558    operation.
1559
1560    Input:
1561    LOOP: The loop being vectorized.
1562    BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1563         function can be added here, or in the loop pre-header.
1564
1565    Output:
1566    Return an index that will be used to index a vector array.  It is expected
1567    that a pointer to the first vector will be used as the base address for the
1568    indexed reference.
1569
1570    FORNOW: we are not trying to be efficient, just creating a new index each
1571    time from scratch.  At this time all vector references could use the same
1572    index.
1573
1574    TODO: create only one index to be used by all vector references.  Record
1575    the index in the LOOP_VINFO the first time this procedure is called and
1576    return it on subsequent calls.  The increment of this index must be placed
1577    just before the conditional expression that ends the single block loop.  */
1578
1579 static tree
1580 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1581 {
1582   tree init, step;
1583   tree indx_before_incr, indx_after_incr;
1584
1585   /* It is assumed that the base pointer used for vectorized access contains
1586      the address of the first vector.  Therefore the index used for vectorized
1587      access must be initialized to zero and incremented by 1.  */
1588
1589   init = integer_zero_node;
1590   step = integer_one_node;
1591
1592   /* Assuming that bsi_insert is used with BSI_NEW_STMT  */
1593   create_iv (init, step, NULL_TREE, loop, bsi, false,
1594         &indx_before_incr, &indx_after_incr);
1595
1596   return indx_before_incr;
1597 }
1598
1599
1600 /* Function vect_create_addr_base_for_vector_ref.
1601
1602    Create an expression that computes the address of the first memory location
1603    that will be accessed for a data reference.
1604
1605    Input:
1606    STMT: The statement containing the data reference.
1607    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1608    OFFSET: Optional. If supplied, it is be added to the initial address.
1609
1610    Output:
1611    1. Return an SSA_NAME whose value is the address of the memory location of
1612       the first vector of the data reference.
1613    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1614       these statement(s) which define the returned SSA_NAME.
1615
1616    FORNOW: We are only handling array accesses with step 1.  */
1617
1618 static tree
1619 vect_create_addr_base_for_vector_ref (tree stmt,
1620                                       tree *new_stmt_list,
1621                                       tree offset)
1622 {
1623   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1624   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1625   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1626   tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1627   tree base_name = unshare_expr (DR_BASE_NAME (dr));
1628   tree ref = DR_REF (dr);
1629   tree data_ref_base_type = TREE_TYPE (data_ref_base);
1630   tree scalar_type = TREE_TYPE (ref);
1631   tree scalar_ptr_type = build_pointer_type (scalar_type);
1632   tree access_fn;
1633   tree init_val, step, init_oval;
1634   bool ok;
1635   bool is_ptr_ref, is_array_ref, is_addr_expr;
1636   tree array_base;
1637   tree vec_stmt;
1638   tree new_temp;
1639   tree array_ref;
1640   tree addr_base, addr_expr;
1641   tree dest, new_stmt;
1642
1643   /* Only the access function of the last index is relevant (i_n in
1644      a[i_1][i_2]...[i_n]), the others correspond to loop invariants.  */
1645   access_fn = DR_ACCESS_FN (dr, 0);
1646   ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1647                                     true);
1648   if (!ok)
1649     init_oval = integer_zero_node;
1650
1651   is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1652                && TREE_CODE (data_ref_base) == SSA_NAME;
1653   is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1654   is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1655                  || TREE_CODE (data_ref_base) == PLUS_EXPR
1656                  || TREE_CODE (data_ref_base) == MINUS_EXPR;
1657   gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1658
1659   /** Create: &(base[init_val])
1660
1661       if data_ref_base is an ARRAY_TYPE:
1662          base = data_ref_base
1663
1664       if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1665          base = *((scalar_array *) data_ref_base)
1666    **/
1667
1668   if (is_array_ref)
1669     array_base = data_ref_base;
1670   else /* is_ptr_ref  or is_addr_expr */
1671     {
1672       /* array_ptr = (scalar_array_ptr_type *) data_ref_base;  */
1673       tree scalar_array_type = build_array_type (scalar_type, 0);
1674       tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1675       tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1676       add_referenced_tmp_var (array_ptr);
1677
1678       dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1679       add_referenced_tmp_var (dest);
1680       data_ref_base =
1681         force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1682       append_to_statement_list_force (new_stmt, new_stmt_list);
1683
1684       vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1685       vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1686       new_temp = make_ssa_name (array_ptr, vec_stmt);
1687       TREE_OPERAND (vec_stmt, 0) = new_temp;
1688       append_to_statement_list_force (vec_stmt, new_stmt_list);
1689
1690       /* (*array_ptr)  */
1691       array_base = build_fold_indirect_ref (new_temp);
1692     }
1693
1694   dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1695   add_referenced_tmp_var (dest);
1696   init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1697   append_to_statement_list_force (new_stmt, new_stmt_list);
1698
1699   if (offset)
1700     {
1701       tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1702       add_referenced_tmp_var (tmp);
1703       vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1704       vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1705       init_val = make_ssa_name (tmp, vec_stmt);
1706       TREE_OPERAND (vec_stmt, 0) = init_val;
1707       append_to_statement_list_force (vec_stmt, new_stmt_list);
1708     }
1709
1710   array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1711                       NULL_TREE, NULL_TREE);
1712   addr_base = build_fold_addr_expr (array_ref);
1713
1714   /* addr_expr = addr_base */
1715   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1716                                      get_name (base_name));
1717   add_referenced_tmp_var (addr_expr);
1718   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1719   new_temp = make_ssa_name (addr_expr, vec_stmt);
1720   TREE_OPERAND (vec_stmt, 0) = new_temp;
1721   append_to_statement_list_force (vec_stmt, new_stmt_list);
1722
1723   return new_temp;
1724 }
1725
1726
1727 /* Function get_vectype_for_scalar_type.
1728
1729    Returns the vector type corresponding to SCALAR_TYPE as supported
1730    by the target.  */
1731
1732 static tree
1733 get_vectype_for_scalar_type (tree scalar_type)
1734 {
1735   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1736   int nbytes = GET_MODE_SIZE (inner_mode);
1737   int nunits;
1738   tree vectype;
1739
1740   if (nbytes == 0)
1741     return NULL_TREE;
1742
1743   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1744      is expected.  */
1745   nunits = UNITS_PER_SIMD_WORD / nbytes;
1746
1747   vectype = build_vector_type (scalar_type, nunits);
1748   if (vect_debug_details (NULL))
1749     {
1750       fprintf (dump_file, "get vectype with %d units of type ", nunits);
1751       print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1752     }
1753
1754   if (!vectype)
1755     return NULL_TREE;
1756
1757   if (vect_debug_details (NULL))
1758     {
1759       fprintf (dump_file, "vectype: ");
1760       print_generic_expr (dump_file, vectype, TDF_SLIM);
1761     }
1762
1763   if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1764     {
1765       /* TODO: tree-complex.c sometimes can parallelize operations
1766          on generic vectors.  We can vectorize the loop in that case,
1767          but then we should re-run the lowering pass.  */
1768       if (vect_debug_details (NULL))
1769         fprintf (dump_file, "mode not supported by target.");
1770       return NULL_TREE;
1771     }
1772
1773   return vectype;
1774 }
1775
1776
1777 /* Function vect_align_data_ref.
1778
1779    Handle mislignment of a memory accesses.
1780
1781    FORNOW: Can't handle misaligned accesses.
1782    Make sure that the dataref is aligned.  */
1783
1784 static void
1785 vect_align_data_ref (tree stmt)
1786 {
1787   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1788   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1789
1790   /* FORNOW: can't handle misaligned accesses;
1791              all accesses expected to be aligned.  */
1792   gcc_assert (aligned_access_p (dr));
1793 }
1794
1795
1796 /* Function vect_create_data_ref_ptr.
1797
1798    Create a memory reference expression for vector access, to be used in a
1799    vector load/store stmt. The reference is based on a new pointer to vector
1800    type (vp).
1801
1802    Input:
1803    1. STMT: a stmt that references memory. Expected to be of the form
1804          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1805    2. BSI: block_stmt_iterator where new stmts can be added.
1806    3. OFFSET (optional): an offset to be added to the initial address accessed
1807         by the data-ref in STMT.
1808    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1809         pointing to the initial address.
1810
1811    Output:
1812    1. Declare a new ptr to vector_type, and have it point to the base of the
1813       data reference (initial addressed accessed by the data reference).
1814       For example, for vector of type V8HI, the following code is generated:
1815
1816       v8hi *vp;
1817       vp = (v8hi *)initial_address;
1818
1819       if OFFSET is not supplied:
1820          initial_address = &a[init];
1821       if OFFSET is supplied:
1822          initial_address = &a[init + OFFSET];
1823
1824       Return the initial_address in INITIAL_ADDRESS.
1825
1826    2. Create a data-reference in the loop based on the new vector pointer vp,
1827       and using a new index variable 'idx' as follows:
1828
1829       vp' = vp + update
1830
1831       where if ONLY_INIT is true:
1832          update = zero
1833       and otherwise
1834          update = idx + vector_type_size
1835
1836       Return the pointer vp'.
1837
1838
1839    FORNOW: handle only aligned and consecutive accesses.  */
1840
1841 static tree
1842 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1843                           tree *initial_address, bool only_init)
1844 {
1845   tree base_name;
1846   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1847   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1848   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1849   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1850   tree vect_ptr_type;
1851   tree vect_ptr;
1852   tree tag;
1853   v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1854   v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1855   vuse_optype vuses = STMT_VUSE_OPS (stmt);
1856   int nvuses, nv_may_defs, nv_must_defs;
1857   int i;
1858   tree new_temp;
1859   tree vec_stmt;
1860   tree new_stmt_list = NULL_TREE;
1861   tree idx;
1862   edge pe = loop_preheader_edge (loop);
1863   basic_block new_bb;
1864   tree vect_ptr_init;
1865   tree vectype_size;
1866   tree ptr_update;
1867   tree data_ref_ptr;
1868   tree type, tmp, size;
1869
1870   base_name = unshare_expr (DR_BASE_NAME (dr));
1871   if (vect_debug_details (NULL))
1872     {
1873       tree data_ref_base = base_name;
1874       fprintf (dump_file, "create array_ref of type: ");
1875       print_generic_expr (dump_file, vectype, TDF_SLIM);
1876       if (TREE_CODE (data_ref_base) == VAR_DECL)
1877         fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1878       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1879         fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1880       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1881         fprintf (dump_file, "vectorizing a record based array ref: ");
1882       else if (TREE_CODE (data_ref_base) == SSA_NAME)
1883         fprintf (dump_file, "vectorizing a pointer ref: ");
1884       print_generic_expr (dump_file, base_name, TDF_SLIM);
1885     }
1886
1887   /** (1) Create the new vector-pointer variable:  **/
1888
1889   vect_ptr_type = build_pointer_type (vectype);
1890   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1891                                     get_name (base_name));
1892   add_referenced_tmp_var (vect_ptr);
1893
1894
1895   /** (2) Handle aliasing information of the new vector-pointer:  **/
1896
1897   tag = STMT_VINFO_MEMTAG (stmt_info);
1898   gcc_assert (tag);
1899   get_var_ann (vect_ptr)->type_mem_tag = tag;
1900
1901   /* Mark for renaming all aliased variables
1902      (i.e, the may-aliases of the type-mem-tag).  */
1903   nvuses = NUM_VUSES (vuses);
1904   nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1905   nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1906   for (i = 0; i < nvuses; i++)
1907     {
1908       tree use = VUSE_OP (vuses, i);
1909       if (TREE_CODE (use) == SSA_NAME)
1910         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1911     }
1912   for (i = 0; i < nv_may_defs; i++)
1913     {
1914       tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1915       if (TREE_CODE (def) == SSA_NAME)
1916         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1917     }
1918   for (i = 0; i < nv_must_defs; i++)
1919     {
1920       tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1921       if (TREE_CODE (def) == SSA_NAME)
1922         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1923     }
1924
1925
1926   /** (3) Calculate the initial address the vector-pointer, and set
1927           the vector-pointer to point to it before the loop:  **/
1928
1929   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
1930   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1931                                                    offset);
1932   pe = loop_preheader_edge (loop);
1933   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1934   gcc_assert (!new_bb);
1935   *initial_address = new_temp;
1936
1937   /* Create: p = (vectype *) initial_base  */
1938   vec_stmt = fold_convert (vect_ptr_type, new_temp);
1939   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1940   new_temp = make_ssa_name (vect_ptr, vec_stmt);
1941   TREE_OPERAND (vec_stmt, 0) = new_temp;
1942   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1943   gcc_assert (!new_bb);
1944   vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1945
1946
1947   /** (4) Handle the updating of the vector-pointer inside the loop: **/
1948
1949   if (only_init) /* No update in loop is required.  */
1950     return vect_ptr_init;
1951
1952   idx = vect_create_index_for_vector_ref (loop, bsi);
1953
1954   /* Create: update = idx * vectype_size  */
1955   tmp = create_tmp_var (integer_type_node, "update");
1956   add_referenced_tmp_var (tmp);
1957   size = TYPE_SIZE (vect_ptr_type);
1958   type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1959   ptr_update = create_tmp_var (type, "update");
1960   add_referenced_tmp_var (ptr_update);
1961   vectype_size = build_int_cst (integer_type_node,
1962                                 GET_MODE_SIZE (TYPE_MODE (vectype)));
1963   vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1964   vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
1965   new_temp = make_ssa_name (tmp, vec_stmt);
1966   TREE_OPERAND (vec_stmt, 0) = new_temp;
1967   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1968   vec_stmt = fold_convert (type, new_temp);
1969   vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1970   new_temp = make_ssa_name (ptr_update, vec_stmt);
1971   TREE_OPERAND (vec_stmt, 0) = new_temp;
1972   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1973
1974   /* Create: data_ref_ptr = vect_ptr_init + update  */
1975   vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1976   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1977   new_temp = make_ssa_name (vect_ptr, vec_stmt);
1978   TREE_OPERAND (vec_stmt, 0) = new_temp;
1979   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1980   data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1981
1982   return data_ref_ptr;
1983 }
1984
1985
1986 /* Function vect_create_destination_var.
1987
1988    Create a new temporary of type VECTYPE.  */
1989
1990 static tree
1991 vect_create_destination_var (tree scalar_dest, tree vectype)
1992 {
1993   tree vec_dest;
1994   const char *new_name;
1995
1996   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1997
1998   new_name = get_name (scalar_dest);
1999   if (!new_name)
2000     new_name = "var_";
2001   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2002   add_referenced_tmp_var (vec_dest);
2003
2004   return vec_dest;
2005 }
2006
2007
2008 /* Function vect_init_vector.
2009
2010    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2011    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2012    used in the vectorization of STMT.  */
2013
2014 static tree
2015 vect_init_vector (tree stmt, tree vector_var)
2016 {
2017   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2018   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2019   tree new_var;
2020   tree init_stmt;
2021   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2022   tree vec_oprnd;
2023   edge pe;
2024   tree new_temp;
2025   basic_block new_bb;
2026
2027   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2028   add_referenced_tmp_var (new_var);
2029
2030   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2031   new_temp = make_ssa_name (new_var, init_stmt);
2032   TREE_OPERAND (init_stmt, 0) = new_temp;
2033
2034   pe = loop_preheader_edge (loop);
2035   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2036   gcc_assert (!new_bb);
2037
2038   if (vect_debug_details (NULL))
2039     {
2040       fprintf (dump_file, "created new init_stmt: ");
2041       print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2042     }
2043
2044   vec_oprnd = TREE_OPERAND (init_stmt, 0);
2045   return vec_oprnd;
2046 }
2047
2048
2049 /* Function vect_get_vec_def_for_operand.
2050
2051    OP is an operand in STMT. This function returns a (vector) def that will be
2052    used in the vectorized stmt for STMT.
2053
2054    In the case that OP is an SSA_NAME which is defined in the loop, then
2055    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2056
2057    In case OP is an invariant or constant, a new stmt that creates a vector def
2058    needs to be introduced.  */
2059
2060 static tree
2061 vect_get_vec_def_for_operand (tree op, tree stmt)
2062 {
2063   tree vec_oprnd;
2064   tree vec_stmt;
2065   tree def_stmt;
2066   stmt_vec_info def_stmt_info = NULL;
2067   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2068   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2069   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2070   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2071   basic_block bb;
2072   tree vec_inv;
2073   tree t = NULL_TREE;
2074   tree def;
2075   int i;
2076
2077   if (vect_debug_details (NULL))
2078     {
2079       fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2080       print_generic_expr (dump_file, op, TDF_SLIM);
2081     }
2082
2083   /** ===> Case 1: operand is a constant.  **/
2084
2085   if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2086     {
2087       /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
2088
2089       tree vec_cst;
2090
2091       /* Build a tree with vector elements.  */
2092       if (vect_debug_details (NULL))
2093         fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2094
2095       for (i = nunits - 1; i >= 0; --i)
2096         {
2097           t = tree_cons (NULL_TREE, op, t);
2098         }
2099       vec_cst = build_vector (vectype, t);
2100       return vect_init_vector (stmt, vec_cst);
2101     }
2102
2103   gcc_assert (TREE_CODE (op) == SSA_NAME);
2104
2105   /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it.  **/
2106
2107   def_stmt = SSA_NAME_DEF_STMT (op);
2108   def_stmt_info = vinfo_for_stmt (def_stmt);
2109
2110   if (vect_debug_details (NULL))
2111     {
2112       fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2113       print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2114     }
2115
2116
2117   /** ==> Case 2.1: operand is defined inside the loop.  **/
2118
2119   if (def_stmt_info)
2120     {
2121       /* Get the def from the vectorized stmt.  */
2122
2123       vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2124       gcc_assert (vec_stmt);
2125       vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2126       return vec_oprnd;
2127     }
2128
2129
2130   /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2131                     it is a reduction/induction.  **/
2132
2133   bb = bb_for_stmt (def_stmt);
2134   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2135     {
2136       if (vect_debug_details (NULL))
2137         fprintf (dump_file, "reduction/induction - unsupported.");
2138       internal_error ("no support for reduction/induction"); /* FORNOW */
2139     }
2140
2141
2142   /** ==> Case 2.3: operand is defined outside the loop -
2143                     it is a loop invariant.  */
2144
2145   switch (TREE_CODE (def_stmt))
2146     {
2147     case PHI_NODE:
2148       def = PHI_RESULT (def_stmt);
2149       break;
2150     case MODIFY_EXPR:
2151       def = TREE_OPERAND (def_stmt, 0);
2152       break;
2153     case NOP_EXPR:
2154       def = TREE_OPERAND (def_stmt, 0);
2155       gcc_assert (IS_EMPTY_STMT (def_stmt));
2156       def = op;
2157       break;
2158     default:
2159       if (vect_debug_details (NULL))
2160         {
2161           fprintf (dump_file, "unsupported defining stmt: ");
2162           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2163         }
2164       internal_error ("unsupported defining stmt");
2165     }
2166
2167   /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}'  */
2168
2169   if (vect_debug_details (NULL))
2170     fprintf (dump_file, "Create vector_inv.");
2171
2172   for (i = nunits - 1; i >= 0; --i)
2173     {
2174       t = tree_cons (NULL_TREE, def, t);
2175     }
2176
2177   vec_inv = build_constructor (vectype, t);
2178   return vect_init_vector (stmt, vec_inv);
2179 }
2180
2181
2182 /* Function vect_finish_stmt_generation.
2183
2184    Insert a new stmt.  */
2185
2186 static void
2187 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2188 {
2189   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2190
2191   if (vect_debug_details (NULL))
2192     {
2193       fprintf (dump_file, "add new stmt: ");
2194       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2195     }
2196
2197   /* Make sure bsi points to the stmt that is being vectorized.  */
2198
2199   /* Assumption: any stmts created for the vectorization of stmt S were
2200      inserted before S. BSI is expected to point to S or some new stmt before S.
2201    */
2202
2203   while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2204     bsi_next (bsi);
2205   gcc_assert (stmt == bsi_stmt (*bsi));
2206 }
2207
2208
2209 /* Function vectorizable_assignment.
2210
2211    Check if STMT performs an assignment (copy) that can be vectorized.
2212    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2213    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2214    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2215
2216 static bool
2217 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2218 {
2219   tree vec_dest;
2220   tree scalar_dest;
2221   tree op;
2222   tree vec_oprnd;
2223   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2224   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2225   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2226   tree new_temp;
2227
2228   /* Is vectorizable assignment?  */
2229
2230   if (TREE_CODE (stmt) != MODIFY_EXPR)
2231     return false;
2232
2233   scalar_dest = TREE_OPERAND (stmt, 0);
2234   if (TREE_CODE (scalar_dest) != SSA_NAME)
2235     return false;
2236
2237   op = TREE_OPERAND (stmt, 1);
2238   if (!vect_is_simple_use (op, loop, NULL))
2239     {
2240       if (vect_debug_details (NULL))
2241         fprintf (dump_file, "use not simple.");
2242       return false;
2243     }
2244
2245   if (!vec_stmt) /* transformation not required.  */
2246     {
2247       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2248       return true;
2249     }
2250
2251   /** Trasform.  **/
2252   if (vect_debug_details (NULL))
2253     fprintf (dump_file, "transform assignment.");
2254
2255   /* Handle def.  */
2256   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2257
2258   /* Handle use.  */
2259   op = TREE_OPERAND (stmt, 1);
2260   vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2261
2262   /* Arguments are ready. create the new vector stmt.  */
2263   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2264   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2265   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2266   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2267
2268   return true;
2269 }
2270
2271
2272 /* Function vectorizable_operation.
2273
2274    Check if STMT performs a binary or unary operation that can be vectorized.
2275    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2276    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2277    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2278
2279 static bool
2280 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2281 {
2282   tree vec_dest;
2283   tree scalar_dest;
2284   tree operation;
2285   tree op0, op1 = NULL;
2286   tree vec_oprnd0, vec_oprnd1=NULL;
2287   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2288   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2289   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2290   int i;
2291   enum tree_code code;
2292   enum machine_mode vec_mode;
2293   tree new_temp;
2294   int op_type;
2295   tree op;
2296   optab optab;
2297
2298   /* Is STMT a vectorizable binary/unary operation?   */
2299   if (TREE_CODE (stmt) != MODIFY_EXPR)
2300     return false;
2301
2302   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2303     return false;
2304
2305   operation = TREE_OPERAND (stmt, 1);
2306   code = TREE_CODE (operation);
2307   optab = optab_for_tree_code (code, vectype);
2308
2309   /* Support only unary or binary operations.  */
2310   op_type = TREE_CODE_LENGTH (code);
2311   if (op_type != unary_op && op_type != binary_op)
2312     {
2313       if (vect_debug_details (NULL))
2314         fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2315       return false;
2316     }
2317
2318   for (i = 0; i < op_type; i++)
2319     {
2320       op = TREE_OPERAND (operation, i);
2321       if (!vect_is_simple_use (op, loop, NULL))
2322         {
2323           if (vect_debug_details (NULL))
2324             fprintf (dump_file, "use not simple.");
2325           return false;
2326         }
2327     }
2328
2329   /* Supportable by target?  */
2330   if (!optab)
2331     {
2332       if (vect_debug_details (NULL))
2333         fprintf (dump_file, "no optab.");
2334       return false;
2335     }
2336   vec_mode = TYPE_MODE (vectype);
2337   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2338     {
2339       if (vect_debug_details (NULL))
2340         fprintf (dump_file, "op not supported by target.");
2341       return false;
2342     }
2343
2344   if (!vec_stmt) /* transformation not required.  */
2345     {
2346       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2347       return true;
2348     }
2349
2350   /** Transform.  **/
2351
2352   if (vect_debug_details (NULL))
2353     fprintf (dump_file, "transform binary/unary operation.");
2354
2355   /* Handle def.  */
2356   scalar_dest = TREE_OPERAND (stmt, 0);
2357   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2358
2359   /* Handle uses.  */
2360   op0 = TREE_OPERAND (operation, 0);
2361   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2362
2363   if (op_type == binary_op)
2364     {
2365       op1 = TREE_OPERAND (operation, 1);
2366       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2367     }
2368
2369   /* Arguments are ready. create the new vector stmt.  */
2370
2371   if (op_type == binary_op)
2372     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2373                 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2374   else
2375     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2376                 build1 (code, vectype, vec_oprnd0));
2377   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2378   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2379   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2380
2381   return true;
2382 }
2383
2384
2385 /* Function vectorizable_store.
2386
2387    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2388    can be vectorized.
2389    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2390    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2391    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2392
2393 static bool
2394 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2395 {
2396   tree scalar_dest;
2397   tree data_ref;
2398   tree op;
2399   tree vec_oprnd1;
2400   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2401   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2402   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2403   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2404   enum machine_mode vec_mode;
2405   tree dummy;
2406   enum dr_alignment_support alignment_support_cheme;
2407
2408   /* Is vectorizable store? */
2409
2410   if (TREE_CODE (stmt) != MODIFY_EXPR)
2411     return false;
2412
2413   scalar_dest = TREE_OPERAND (stmt, 0);
2414   if (TREE_CODE (scalar_dest) != ARRAY_REF
2415       && TREE_CODE (scalar_dest) != INDIRECT_REF)
2416     return false;
2417
2418   op = TREE_OPERAND (stmt, 1);
2419   if (!vect_is_simple_use (op, loop, NULL))
2420     {
2421       if (vect_debug_details (NULL))
2422         fprintf (dump_file, "use not simple.");
2423       return false;
2424     }
2425
2426   vec_mode = TYPE_MODE (vectype);
2427   /* FORNOW. In some cases can vectorize even if data-type not supported
2428      (e.g. - array initialization with 0).  */
2429   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2430     return false;
2431
2432   if (!STMT_VINFO_DATA_REF (stmt_info))
2433     return false;
2434
2435
2436   if (!vec_stmt) /* transformation not required.  */
2437     {
2438       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2439       return true;
2440     }
2441
2442   /** Trasform.  **/
2443
2444   if (vect_debug_details (NULL))
2445     fprintf (dump_file, "transform store");
2446
2447   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2448   gcc_assert (alignment_support_cheme);
2449   gcc_assert (alignment_support_cheme = dr_aligned);  /* FORNOW */
2450
2451   /* Handle use - get the vectorized def from the defining stmt.  */
2452   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2453
2454   /* Handle def.  */
2455   /* FORNOW: make sure the data reference is aligned.  */
2456   vect_align_data_ref (stmt);
2457   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2458   data_ref = build_fold_indirect_ref (data_ref);
2459
2460   /* Arguments are ready. create the new vector stmt.  */
2461   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2462   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2463
2464   return true;
2465 }
2466
2467
2468 /* vectorizable_load.
2469
2470    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2471    can be vectorized.
2472    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2473    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2474    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2475
2476 static bool
2477 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2478 {
2479   tree scalar_dest;
2480   tree vec_dest = NULL;
2481   tree data_ref = NULL;
2482   tree op;
2483   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2484   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2485   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2486   tree new_temp;
2487   int mode;
2488   tree init_addr;
2489   tree new_stmt;
2490   tree dummy;
2491   basic_block new_bb;
2492   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2493   edge pe = loop_preheader_edge (loop);
2494   enum dr_alignment_support alignment_support_cheme;
2495
2496   /* Is vectorizable load? */
2497
2498   if (TREE_CODE (stmt) != MODIFY_EXPR)
2499     return false;
2500
2501   scalar_dest = TREE_OPERAND (stmt, 0);
2502   if (TREE_CODE (scalar_dest) != SSA_NAME)
2503     return false;
2504
2505   op = TREE_OPERAND (stmt, 1);
2506   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2507     return false;
2508
2509   if (!STMT_VINFO_DATA_REF (stmt_info))
2510     return false;
2511
2512   mode = (int) TYPE_MODE (vectype);
2513
2514   /* FORNOW. In some cases can vectorize even if data-type not supported
2515     (e.g. - data copies).  */
2516   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2517     {
2518       if (vect_debug_details (loop))
2519         fprintf (dump_file, "Aligned load, but unsupported type.");
2520       return false;
2521     }
2522
2523   if (!vec_stmt) /* transformation not required.  */
2524     {
2525       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2526       return true;
2527     }
2528
2529   /** Trasform.  **/
2530
2531   if (vect_debug_details (NULL))
2532     fprintf (dump_file, "transform load.");
2533
2534   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2535   gcc_assert (alignment_support_cheme);
2536
2537   if (alignment_support_cheme == dr_aligned
2538       || alignment_support_cheme == dr_unaligned_supported)
2539     {
2540       /* Create:
2541          p = initial_addr;
2542          indx = 0;
2543          loop {
2544            vec_dest = *(p);
2545            indx = indx + 1;
2546          }
2547       */
2548
2549       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2550       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2551       if (aligned_access_p (dr))
2552         data_ref = build_fold_indirect_ref (data_ref);
2553       else
2554         {
2555           int mis = DR_MISALIGNMENT (dr);
2556           tree tmis = (mis == -1 ?
2557                        integer_zero_node :
2558                        build_int_cst (integer_type_node, mis));
2559           tmis = int_const_binop (MULT_EXPR, tmis,
2560                         build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2561           data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2562         }
2563       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2564       new_temp = make_ssa_name (vec_dest, new_stmt);
2565       TREE_OPERAND (new_stmt, 0) = new_temp;
2566       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2567     }
2568   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2569     {
2570       /* Create:
2571          p1 = initial_addr;
2572          msq_init = *(floor(p1))
2573          p2 = initial_addr + VS - 1;
2574          magic = have_builtin ? builtin_result : initial_address;
2575          indx = 0;
2576          loop {
2577            p2' = p2 + indx * vectype_size
2578            lsq = *(floor(p2'))
2579            vec_dest = realign_load (msq, lsq, magic)
2580            indx = indx + 1;
2581            msq = lsq;
2582          }
2583       */
2584
2585       tree offset;
2586       tree magic;
2587       tree phi_stmt;
2588       tree msq_init;
2589       tree msq, lsq;
2590       tree dataref_ptr;
2591       tree params;
2592
2593       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
2594       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2595       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2596                                            &init_addr, true);
2597       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2598       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2599       new_temp = make_ssa_name (vec_dest, new_stmt);
2600       TREE_OPERAND (new_stmt, 0) = new_temp;
2601       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2602       gcc_assert (!new_bb);
2603       msq_init = TREE_OPERAND (new_stmt, 0);
2604
2605
2606       /* <2> Create lsq = *(floor(p2')) in the loop  */
2607       offset = build_int_cst (integer_type_node,
2608                               GET_MODE_NUNITS (TYPE_MODE (vectype)));
2609       offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2610       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2611       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2612       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2613       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2614       new_temp = make_ssa_name (vec_dest, new_stmt);
2615       TREE_OPERAND (new_stmt, 0) = new_temp;
2616       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2617       lsq = TREE_OPERAND (new_stmt, 0);
2618
2619
2620       /* <3> */
2621       if (targetm.vectorize.builtin_mask_for_load)
2622         {
2623           /* Create permutation mask, if required, in loop preheader.  */
2624           tree builtin_decl;
2625           params = build_tree_list (NULL_TREE, init_addr);
2626           vec_dest = vect_create_destination_var (scalar_dest, vectype);
2627           builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2628           new_stmt = build_function_call_expr (builtin_decl, params);
2629           new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2630           new_temp = make_ssa_name (vec_dest, new_stmt);
2631           TREE_OPERAND (new_stmt, 0) = new_temp;
2632           new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2633           gcc_assert (!new_bb);
2634           magic = TREE_OPERAND (new_stmt, 0);
2635         }
2636       else
2637         {
2638           /* Use current address instead of init_addr for reduced reg pressure.
2639            */
2640           magic = dataref_ptr;
2641         }
2642
2643
2644       /* <4> Create msq = phi <msq_init, lsq> in loop  */
2645       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2646       msq = make_ssa_name (vec_dest, NULL_TREE);
2647       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2648       SSA_NAME_DEF_STMT (msq) = phi_stmt;
2649       add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2650       add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2651
2652
2653       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
2654       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2655       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2656       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2657       new_temp = make_ssa_name (vec_dest, new_stmt);
2658       TREE_OPERAND (new_stmt, 0) = new_temp;
2659       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2660     }
2661   else
2662     gcc_unreachable ();
2663
2664   *vec_stmt = new_stmt;
2665   return true;
2666 }
2667
2668
2669 /* Function vect_supportable_dr_alignment
2670
2671    Return whether the data reference DR is supported with respect to its
2672    alignment.  */
2673
2674 static enum dr_alignment_support
2675 vect_supportable_dr_alignment (struct data_reference *dr)
2676 {
2677   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2678   enum machine_mode mode = (int) TYPE_MODE (vectype);
2679
2680   if (aligned_access_p (dr))
2681     return dr_aligned;
2682
2683   /* Possibly unaligned access.  */
2684
2685   if (DR_IS_READ (dr))
2686     {
2687       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2688           && (!targetm.vectorize.builtin_mask_for_load
2689               || targetm.vectorize.builtin_mask_for_load ()))
2690         return dr_unaligned_software_pipeline;
2691
2692       if (targetm.vectorize.misaligned_mem_ok (mode))
2693         /* Can't software pipeline the loads.  */
2694         return dr_unaligned_supported;
2695     }
2696
2697   /* Unsupported.  */
2698   return dr_unaligned_unsupported;
2699 }
2700
2701
2702 /* Function vect_transform_stmt.
2703
2704    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2705
2706 static bool
2707 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2708 {
2709   bool is_store = false;
2710   tree vec_stmt = NULL_TREE;
2711   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2712   bool done;
2713
2714   switch (STMT_VINFO_TYPE (stmt_info))
2715     {
2716     case op_vec_info_type:
2717       done = vectorizable_operation (stmt, bsi, &vec_stmt);
2718       gcc_assert (done);
2719       break;
2720
2721     case assignment_vec_info_type:
2722       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2723       gcc_assert (done);
2724       break;
2725
2726     case load_vec_info_type:
2727       done = vectorizable_load (stmt, bsi, &vec_stmt);
2728       gcc_assert (done);
2729       break;
2730
2731     case store_vec_info_type:
2732       done = vectorizable_store (stmt, bsi, &vec_stmt);
2733       gcc_assert (done);
2734       is_store = true;
2735       break;
2736     default:
2737       if (vect_debug_details (NULL))
2738         fprintf (dump_file, "stmt not supported.");
2739       gcc_unreachable ();
2740     }
2741
2742   STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2743
2744   return is_store;
2745 }
2746
2747
2748 /* This function builds ni_name = number of iterations loop executes
2749    on the loop preheader.  */
2750
2751 static tree
2752 vect_build_loop_niters (loop_vec_info loop_vinfo)
2753 {
2754   tree ni_name, stmt, var;
2755   edge pe;
2756   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2757   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2758
2759   var = create_tmp_var (TREE_TYPE (ni), "niters");
2760   add_referenced_tmp_var (var);
2761   ni_name = force_gimple_operand (ni, &stmt, false, var);
2762
2763   pe = loop_preheader_edge (loop);
2764   if (stmt)
2765     {
2766       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2767       gcc_assert (!new_bb);
2768     }
2769
2770   return ni_name;
2771 }
2772
2773
2774 /* This function generates the following statements:
2775
2776  ni_name = number of iterations loop executes
2777  ratio = ni_name / vf
2778  ratio_mult_vf_name = ratio * vf
2779
2780  and places them at the loop preheader edge.  */
2781
2782 static void
2783 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2784                                  tree *ni_name_ptr,
2785                                  tree *ratio_mult_vf_name_ptr,
2786                                  tree *ratio_name_ptr)
2787 {
2788
2789   edge pe;
2790   basic_block new_bb;
2791   tree stmt, ni_name;
2792   tree var;
2793   tree ratio_name;
2794   tree ratio_mult_vf_name;
2795   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2796   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2797   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2798   tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2799
2800   pe = loop_preheader_edge (loop);
2801
2802   /* Generate temporary variable that contains
2803      number of iterations loop executes.  */
2804
2805   ni_name = vect_build_loop_niters (loop_vinfo);
2806
2807   /* Create: ratio = ni >> log2(vf) */
2808
2809   var = create_tmp_var (TREE_TYPE (ni), "bnd");
2810   add_referenced_tmp_var (var);
2811   ratio_name = make_ssa_name (var, NULL_TREE);
2812   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2813            build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2814   SSA_NAME_DEF_STMT (ratio_name) = stmt;
2815
2816   pe = loop_preheader_edge (loop);
2817   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2818   gcc_assert (!new_bb);
2819
2820   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
2821
2822   var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2823   add_referenced_tmp_var (var);
2824   ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2825   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2826            build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2827   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2828
2829   pe = loop_preheader_edge (loop);
2830   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2831   gcc_assert (!new_bb);
2832
2833   *ni_name_ptr = ni_name;
2834   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2835   *ratio_name_ptr = ratio_name;
2836
2837   return;
2838 }
2839
2840
2841 /*   Function vect_update_ivs_after_vectorizer.
2842
2843      "Advance" the induction variables of LOOP to the value they should take
2844      after the execution of LOOP.  This is currently necessary because the
2845      vectorizer does not handle induction variables that are used after the
2846      loop.  Such a situation occurs when the last iterations of LOOP are
2847      peeled, because:
2848      1. We introduced new uses after LOOP for IVs that were not originally used
2849         after LOOP: the IVs of LOOP are now used by an epilog loop.
2850      2. LOOP is going to be vectorized; this means that it will iterate N/VF
2851         times, whereas the loop IVs should be bumped N times.
2852
2853      Input:
2854      - LOOP - a loop that is going to be vectorized. The last few iterations
2855               of LOOP were peeled.
2856      - NITERS - the number of iterations that LOOP executes (before it is
2857                 vectorized). i.e, the number of times the ivs should be bumped.
2858      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2859                   coming out from LOOP on which there are uses of the LOOP ivs
2860                   (this is the path from LOOP->exit to epilog_loop->preheader).
2861
2862                   The new definitions of the ivs are placed in LOOP->exit.
2863                   The phi args associated with the edge UPDATE_E in the bb
2864                   UPDATE_E->dest are updated accordingly.
2865
2866      Assumption 1: Like the rest of the vectorizer, this function assumes
2867      a single loop exit that has a single predecessor.
2868
2869      Assumption 2: The phi nodes in the LOOP header and in update_bb are
2870      organized in the same order.
2871
2872      Assumption 3: The access function of the ivs is simple enough (see
2873      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2874
2875      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2876      coming out of LOOP on which the ivs of LOOP are used (this is the path
2877      that leads to the epilog loop; other paths skip the epilog loop).  This
2878      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2879      needs to have its phis updated.
2880  */
2881
2882 static void
2883 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2884 {
2885   basic_block exit_bb = loop->exit_edges[0]->dest;
2886   tree phi, phi1;
2887   basic_block update_bb = update_e->dest;
2888
2889   /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2890
2891   /* Make sure there exists a single-predecessor exit bb:  */
2892   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2893
2894   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2895        phi && phi1;
2896        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2897     {
2898       tree access_fn = NULL;
2899       tree evolution_part;
2900       tree init_expr;
2901       tree step_expr;
2902       tree var, stmt, ni, ni_name;
2903       block_stmt_iterator last_bsi;
2904
2905       /* Skip virtual phi's.  */
2906       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2907         {
2908           if (vect_debug_details (NULL))
2909             fprintf (dump_file, "virtual phi. skip.");
2910           continue;
2911         }
2912
2913       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2914       gcc_assert (access_fn);
2915       evolution_part =
2916          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2917       gcc_assert (evolution_part != NULL_TREE);
2918
2919       /* FORNOW: We do not support IVs whose evolution function is a polynomial
2920          of degree >= 2 or exponential.  */
2921       gcc_assert (!tree_is_chrec (evolution_part));
2922
2923       step_expr = evolution_part;
2924       init_expr = unshare_expr (initial_condition (access_fn));
2925
2926       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2927                   build2 (MULT_EXPR, TREE_TYPE (niters),
2928                        niters, step_expr), init_expr);
2929
2930       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2931       add_referenced_tmp_var (var);
2932
2933       ni_name = force_gimple_operand (ni, &stmt, false, var);
2934
2935       /* Insert stmt into exit_bb.  */
2936       last_bsi = bsi_last (exit_bb);
2937       if (stmt)
2938         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2939
2940       /* Fix phi expressions in the successor bb.  */
2941       gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2942                   PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2943       SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2944     }
2945 }
2946
2947
2948 /* Function vect_do_peeling_for_loop_bound
2949
2950    Peel the last iterations of the loop represented by LOOP_VINFO.
2951    The peeled iterations form a new epilog loop.  Given that the loop now
2952    iterates NITERS times, the new epilog loop iterates
2953    NITERS % VECTORIZATION_FACTOR times.
2954
2955    The original loop will later be made to iterate
2956    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2957
2958 static void
2959 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2960                                 struct loops *loops)
2961 {
2962
2963   tree ni_name, ratio_mult_vf_name;
2964   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2965   struct loop *new_loop;
2966   edge update_e;
2967 #ifdef ENABLE_CHECKING
2968   int loop_num;
2969 #endif
2970
2971   if (vect_debug_details (NULL))
2972     fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2973
2974   /* Generate the following variables on the preheader of original loop:
2975
2976      ni_name = number of iteration the original loop executes
2977      ratio = ni_name / vf
2978      ratio_mult_vf_name = ratio * vf  */
2979   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2980                                    &ratio_mult_vf_name, ratio);
2981
2982   /* Update loop info.  */
2983   loop->pre_header = loop_preheader_edge (loop)->src;
2984   loop->pre_header_edges[0] = loop_preheader_edge (loop);
2985
2986 #ifdef ENABLE_CHECKING
2987   loop_num  = loop->num;
2988 #endif
2989   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
2990                                             ratio_mult_vf_name, ni_name, false);
2991 #ifdef ENABLE_CHECKING
2992   gcc_assert (new_loop);
2993   gcc_assert (loop_num == loop->num);
2994   slpeel_verify_cfg_after_peeling (loop, new_loop);
2995 #endif
2996
2997   /* A guard that controls whether the new_loop is to be executed or skipped
2998      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
2999      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
3000      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
3001      is on the path where the LOOP IVs are used and need to be updated.  */
3002
3003   if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3004     update_e = EDGE_PRED (new_loop->pre_header, 0);
3005   else
3006     update_e = EDGE_PRED (new_loop->pre_header, 1);
3007
3008   /* Update IVs of original loop as if they were advanced
3009      by ratio_mult_vf_name steps.  */
3010   vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3011
3012   /* After peeling we have to reset scalar evolution analyzer.  */
3013   scev_reset ();
3014
3015   return;
3016 }
3017
3018
3019 /* Function vect_gen_niters_for_prolog_loop
3020
3021    Set the number of iterations for the loop represented by LOOP_VINFO
3022    to the minimum between LOOP_NITERS (the original iteration count of the loop)
3023    and the misalignment of DR - the first data reference recorded in
3024    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
3025    this loop, the data reference DR will refer to an aligned location.
3026
3027    The following computation is generated:
3028
3029    compute address misalignment in bytes:
3030    addr_mis = addr & (vectype_size - 1)
3031
3032    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3033
3034    (elem_size = element type size; an element is the scalar element
3035         whose type is the inner type of the vectype)  */
3036
3037 static tree
3038 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3039 {
3040   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3041   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3042   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3043   tree var, stmt;
3044   tree iters, iters_name;
3045   edge pe;
3046   basic_block new_bb;
3047   tree dr_stmt = DR_STMT (dr);
3048   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3049   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3050   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3051   tree elem_misalign;
3052   tree byte_misalign;
3053   tree new_stmts = NULL_TREE;
3054   tree start_addr =
3055         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3056   tree ptr_type = TREE_TYPE (start_addr);
3057   tree size = TYPE_SIZE (ptr_type);
3058   tree type = lang_hooks.types.type_for_size (TREE_INT_CST_LOW (size), 1);
3059   tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3060   tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3061   tree niters_type = TREE_TYPE (loop_niters);
3062   tree elem_size_log =
3063         build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3064   tree vf_tree = build_int_cst (unsigned_type_node, vf);
3065
3066   pe = loop_preheader_edge (loop);
3067   new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3068   gcc_assert (!new_bb);
3069
3070   /* Create:  byte_misalign = addr & (vectype_size - 1)  */
3071   byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3072
3073   /* Create:  elem_misalign = byte_misalign / element_size  */
3074   elem_misalign =
3075         build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3076
3077   /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
3078   iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3079   iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3080   iters = fold_convert (niters_type, iters);
3081
3082   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
3083   /* If the loop bound is known at compile time we already verified that it is
3084      greater than vf; since the misalignment ('iters') is at most vf, there's
3085      no need to generate the MIN_EXPR in this case.  */
3086   if (!host_integerp (loop_niters, 0))
3087     iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3088
3089   var = create_tmp_var (niters_type, "prolog_loop_niters");
3090   add_referenced_tmp_var (var);
3091   iters_name = force_gimple_operand (iters, &stmt, false, var);
3092
3093   /* Insert stmt on loop preheader edge.  */
3094   pe = loop_preheader_edge (loop);
3095   if (stmt)
3096     {
3097       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3098       gcc_assert (!new_bb);
3099     }
3100
3101   return iters_name;
3102 }
3103
3104
3105 /* Function vect_update_inits_of_dr
3106
3107    NITERS iterations were peeled from LOOP.  DR represents a data reference
3108    in LOOP.  This function updates the information recorded in DR to
3109    account for the fact that the first NITERS iterations had already been
3110    executed.  Specifically, it updates the initial_condition of the
3111    access_function of DR.  */
3112
3113 static void
3114 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3115                          tree niters)
3116 {
3117   tree access_fn = DR_ACCESS_FN (dr, 0);
3118   tree init, init_new, step;
3119
3120   step = evolution_part_in_loop_num (access_fn, loop->num);
3121   init = initial_condition (access_fn);
3122
3123   init_new = build2 (PLUS_EXPR, TREE_TYPE (init),
3124                   build2 (MULT_EXPR, TREE_TYPE (niters),
3125                          niters, step), init);
3126   DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3127
3128   return;
3129 }
3130
3131
3132 /* Function vect_update_inits_of_drs
3133
3134    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3135    This function updates the information recorded for the data references in
3136    the loop to account for the fact that the first NITERS iterations had
3137    already been executed.  Specifically, it updates the initial_condition of the
3138    access_function of all the data_references in the loop.  */
3139
3140 static void
3141 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3142 {
3143   unsigned int i;
3144   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3145   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3146   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3147
3148   if (dump_file && (dump_flags & TDF_DETAILS))
3149     fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3150
3151   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3152     {
3153       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3154       vect_update_inits_of_dr (dr, loop, niters);
3155     }
3156
3157   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3158     {
3159       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3160       vect_update_inits_of_dr (dr, loop, niters);
3161     }
3162 }
3163
3164
3165 /* Function vect_do_peeling_for_alignment
3166
3167    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3168    'niters' is set to the misalignment of one of the data references in the
3169    loop, thereby forcing it to refer to an aligned location at the beginning
3170    of the execution of this loop.  The data reference for which we are
3171    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
3172
3173 static void
3174 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3175 {
3176   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3177   tree niters_of_prolog_loop, ni_name;
3178   tree n_iters;
3179   struct loop *new_loop;
3180
3181   if (vect_debug_details (NULL))
3182     fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3183
3184   ni_name = vect_build_loop_niters (loop_vinfo);
3185   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3186
3187   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
3188   new_loop =
3189         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3190                                        niters_of_prolog_loop, ni_name, true);
3191 #ifdef ENABLE_CHECKING
3192   gcc_assert (new_loop);
3193   slpeel_verify_cfg_after_peeling (new_loop, loop);
3194 #endif
3195
3196   /* Update number of times loop executes.  */
3197   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3198   LOOP_VINFO_NITERS (loop_vinfo) =
3199     build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3200
3201   /* Update the init conditions of the access functions of all data refs.  */
3202   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3203
3204   /* After peeling we have to reset scalar evolution analyzer.  */
3205   scev_reset ();
3206
3207   return;
3208 }
3209
3210
3211 /* Function vect_transform_loop.
3212
3213    The analysis phase has determined that the loop is vectorizable.
3214    Vectorize the loop - created vectorized stmts to replace the scalar
3215    stmts in the loop, and update the loop exit condition.  */
3216
3217 static void
3218 vect_transform_loop (loop_vec_info loop_vinfo,
3219                      struct loops *loops ATTRIBUTE_UNUSED)
3220 {
3221   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3222   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3223   int nbbs = loop->num_nodes;
3224   block_stmt_iterator si;
3225   int i;
3226   tree ratio = NULL;
3227   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3228
3229   if (vect_debug_details (NULL))
3230     fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3231
3232
3233   /* Peel the loop if there are data refs with unknown alignment.
3234      Only one data ref with unknown store is allowed.  */
3235
3236   if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3237     vect_do_peeling_for_alignment (loop_vinfo, loops);
3238
3239   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3240      compile time constant), or it is a constant that doesn't divide by the
3241      vectorization factor, then an epilog loop needs to be created.
3242      We therefore duplicate the loop: the original loop will be vectorized,
3243      and will compute the first (n/VF) iterations. The second copy of the loop
3244      will remain scalar and will compute the remaining (n%VF) iterations.
3245      (VF is the vectorization factor).  */
3246
3247   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3248       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3249           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3250     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3251   else
3252     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3253                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3254
3255   /* 1) Make sure the loop header has exactly two entries
3256      2) Make sure we have a preheader basic block.  */
3257
3258   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3259
3260   loop_split_edge_with (loop_preheader_edge (loop), NULL);
3261
3262
3263   /* FORNOW: the vectorizer supports only loops which body consist
3264      of one basic block (header + empty latch). When the vectorizer will
3265      support more involved loop forms, the order by which the BBs are
3266      traversed need to be reconsidered.  */
3267
3268   for (i = 0; i < nbbs; i++)
3269     {
3270       basic_block bb = bbs[i];
3271
3272       for (si = bsi_start (bb); !bsi_end_p (si);)
3273         {
3274           tree stmt = bsi_stmt (si);
3275           stmt_vec_info stmt_info;
3276           bool is_store;
3277
3278           if (vect_debug_details (NULL))
3279             {
3280               fprintf (dump_file, "------>vectorizing statement: ");
3281               print_generic_expr (dump_file, stmt, TDF_SLIM);
3282             }
3283           stmt_info = vinfo_for_stmt (stmt);
3284           gcc_assert (stmt_info);
3285           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3286             {
3287               bsi_next (&si);
3288               continue;
3289             }
3290 #ifdef ENABLE_CHECKING
3291           /* FORNOW: Verify that all stmts operate on the same number of
3292                      units and no inner unrolling is necessary.  */
3293           gcc_assert
3294                 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3295                  == vectorization_factor);
3296 #endif
3297           /* -------- vectorize statement ------------ */
3298           if (vect_debug_details (NULL))
3299             fprintf (dump_file, "transform statement.");
3300
3301           is_store = vect_transform_stmt (stmt, &si);
3302           if (is_store)
3303             {
3304               /* free the attached stmt_vec_info and remove the stmt.  */
3305               stmt_ann_t ann = stmt_ann (stmt);
3306               free (stmt_info);
3307               set_stmt_info (ann, NULL);
3308               bsi_remove (&si);
3309               continue;
3310             }
3311
3312           bsi_next (&si);
3313         }                       /* stmts in BB */
3314     }                           /* BBs in loop */
3315
3316   slpeel_make_loop_iterate_ntimes (loop, ratio);
3317
3318   if (vect_debug_details (loop))
3319     fprintf (dump_file,"Success! loop vectorized.");
3320   if (vect_debug_stats (loop))
3321     fprintf (dump_file, "LOOP VECTORIZED.");
3322 }
3323
3324
3325 /* Function vect_is_simple_use.
3326
3327    Input:
3328    LOOP - the loop that is being vectorized.
3329    OPERAND - operand of a stmt in LOOP.
3330    DEF - the defining stmt in case OPERAND is an SSA_NAME.
3331
3332    Returns whether a stmt with OPERAND can be vectorized.
3333    Supportable operands are constants, loop invariants, and operands that are
3334    defined by the current iteration of the loop. Unsupportable operands are
3335    those that are defined by a previous iteration of the loop (as is the case
3336    in reduction/induction computations).  */
3337
3338 static bool
3339 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3340 {
3341   tree def_stmt;
3342   basic_block bb;
3343
3344   if (def)
3345     *def = NULL_TREE;
3346
3347   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3348     return true;
3349
3350   if (TREE_CODE (operand) != SSA_NAME)
3351     return false;
3352
3353   def_stmt = SSA_NAME_DEF_STMT (operand);
3354   if (def_stmt == NULL_TREE )
3355     {
3356       if (vect_debug_details (NULL))
3357         fprintf (dump_file, "no def_stmt.");
3358       return false;
3359     }
3360
3361   /* empty stmt is expected only in case of a function argument.
3362      (Otherwise - we expect a phi_node or a modify_expr).  */
3363   if (IS_EMPTY_STMT (def_stmt))
3364     {
3365       tree arg = TREE_OPERAND (def_stmt, 0);
3366       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3367         return true;
3368       if (vect_debug_details (NULL))
3369         {
3370           fprintf (dump_file, "Unexpected empty stmt: ");
3371           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3372         }
3373       return false;
3374     }
3375
3376   /* phi_node inside the loop indicates an induction/reduction pattern.
3377      This is not supported yet.  */
3378   bb = bb_for_stmt (def_stmt);
3379   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3380     {
3381       if (vect_debug_details (NULL))
3382         fprintf (dump_file, "reduction/induction - unsupported.");
3383       return false; /* FORNOW: not supported yet.  */
3384     }
3385
3386   /* Expecting a modify_expr or a phi_node.  */
3387   if (TREE_CODE (def_stmt) == MODIFY_EXPR
3388       || TREE_CODE (def_stmt) == PHI_NODE)
3389     {
3390       if (def)
3391         *def = def_stmt;
3392       return true;
3393     }
3394
3395   return false;
3396 }
3397
3398
3399 /* Function vect_analyze_operations.
3400
3401    Scan the loop stmts and make sure they are all vectorizable.  */
3402
3403 static bool
3404 vect_analyze_operations (loop_vec_info loop_vinfo)
3405 {
3406   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3407   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3408   int nbbs = loop->num_nodes;
3409   block_stmt_iterator si;
3410   unsigned int vectorization_factor = 0;
3411   int i;
3412   bool ok;
3413   tree scalar_type;
3414
3415   if (vect_debug_details (NULL))
3416     fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3417
3418   for (i = 0; i < nbbs; i++)
3419     {
3420       basic_block bb = bbs[i];
3421
3422       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3423         {
3424           tree stmt = bsi_stmt (si);
3425           unsigned int nunits;
3426           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3427           tree vectype;
3428
3429           if (vect_debug_details (NULL))
3430             {
3431               fprintf (dump_file, "==> examining statement: ");
3432               print_generic_expr (dump_file, stmt, TDF_SLIM);
3433             }
3434
3435           gcc_assert (stmt_info);
3436
3437           /* skip stmts which do not need to be vectorized.
3438              this is expected to include:
3439              - the COND_EXPR which is the loop exit condition
3440              - any LABEL_EXPRs in the loop
3441              - computations that are used only for array indexing or loop
3442              control  */
3443
3444           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3445             {
3446               if (vect_debug_details (NULL))
3447                 fprintf (dump_file, "irrelevant.");
3448               continue;
3449             }
3450
3451           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3452             {
3453               if (vect_debug_stats (loop) || vect_debug_details (loop))
3454                 {
3455                   fprintf (dump_file, "not vectorized: vector stmt in loop:");
3456                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3457                 }
3458               return false;
3459             }
3460
3461           if (STMT_VINFO_DATA_REF (stmt_info))
3462             scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3463           else if (TREE_CODE (stmt) == MODIFY_EXPR)
3464             scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3465           else
3466             scalar_type = TREE_TYPE (stmt);
3467
3468           if (vect_debug_details (NULL))
3469             {
3470               fprintf (dump_file, "get vectype for scalar type:  ");
3471               print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3472             }
3473
3474           vectype = get_vectype_for_scalar_type (scalar_type);
3475           if (!vectype)
3476             {
3477               if (vect_debug_stats (loop) || vect_debug_details (loop))
3478                 {
3479                   fprintf (dump_file, "not vectorized: unsupported data-type ");
3480                   print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3481                 }
3482               return false;
3483             }
3484
3485           if (vect_debug_details (NULL))
3486             {
3487               fprintf (dump_file, "vectype: ");
3488               print_generic_expr (dump_file, vectype, TDF_SLIM);
3489             }
3490           STMT_VINFO_VECTYPE (stmt_info) = vectype;
3491
3492           ok = (vectorizable_operation (stmt, NULL, NULL)
3493                 || vectorizable_assignment (stmt, NULL, NULL)
3494                 || vectorizable_load (stmt, NULL, NULL)
3495                 || vectorizable_store (stmt, NULL, NULL));
3496
3497           if (!ok)
3498             {
3499               if (vect_debug_stats (loop) || vect_debug_details (loop))
3500                 {
3501                   fprintf (dump_file, "not vectorized: stmt not supported: ");
3502                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3503                 }
3504               return false;
3505             }
3506
3507           nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3508           if (vect_debug_details (NULL))
3509             fprintf (dump_file, "nunits = %d", nunits);
3510
3511           if (vectorization_factor)
3512             {
3513               /* FORNOW: don't allow mixed units.
3514                  This restriction will be relaxed in the future.  */
3515               if (nunits != vectorization_factor)
3516                 {
3517                   if (vect_debug_stats (loop) || vect_debug_details (loop))
3518                     fprintf (dump_file, "not vectorized: mixed data-types");
3519                   return false;
3520                 }
3521             }
3522           else
3523             vectorization_factor = nunits;
3524
3525 #ifdef ENABLE_CHECKING
3526           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3527                         * vectorization_factor == UNITS_PER_SIMD_WORD);
3528 #endif
3529         }
3530     }
3531
3532   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
3533
3534   if (vectorization_factor <= 1)
3535     {
3536       if (vect_debug_stats (loop) || vect_debug_details (loop))
3537         fprintf (dump_file, "not vectorized: unsupported data-type");
3538       return false;
3539     }
3540   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3541
3542   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3543     fprintf (dump_file,
3544         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3545         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3546
3547   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3548       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3549     {
3550       if (vect_debug_stats (loop) || vect_debug_details (loop))
3551         fprintf (dump_file, "not vectorized: iteration count too small.");
3552       return false;
3553     }
3554
3555   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3556       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3557     {
3558       if (vect_debug_stats (loop) || vect_debug_details (loop))
3559         fprintf (dump_file, "epilog loop required.");
3560       if (!vect_can_advance_ivs_p (loop))
3561         {
3562           if (vect_debug_stats (loop) || vect_debug_details (loop))
3563             fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3564           return false;
3565         }
3566       if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3567         {
3568           if (vect_debug_stats (loop) || vect_debug_details (loop))
3569             fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3570           return false;
3571         }
3572     }
3573
3574   return true;
3575 }
3576
3577
3578 /* Function exist_non_indexing_operands_for_use_p
3579
3580    USE is one of the uses attached to STMT. Check if USE is
3581    used in STMT for anything other than indexing an array.  */
3582
3583 static bool
3584 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3585 {
3586   tree operand;
3587   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3588
3589   /* USE corresponds to some operand in STMT. If there is no data
3590      reference in STMT, then any operand that corresponds to USE
3591      is not indexing an array.  */
3592   if (!STMT_VINFO_DATA_REF (stmt_info))
3593     return true;
3594
3595   /* STMT has a data_ref. FORNOW this means that its of one of
3596      the following forms:
3597      -1- ARRAY_REF = var
3598      -2- var = ARRAY_REF
3599      (This should have been verified in analyze_data_refs).
3600
3601      'var' in the second case corresponds to a def, not a use,
3602      so USE cannot correspond to any operands that are not used
3603      for array indexing.
3604
3605      Therefore, all we need to check is if STMT falls into the
3606      first case, and whether var corresponds to USE.  */
3607
3608   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3609     return false;
3610
3611   operand = TREE_OPERAND (stmt, 1);
3612
3613   if (TREE_CODE (operand) != SSA_NAME)
3614     return false;
3615
3616   if (operand == use)
3617     return true;
3618
3619   return false;
3620 }
3621
3622
3623 /* Function vect_is_simple_iv_evolution.
3624
3625    FORNOW: A simple evolution of an induction variables in the loop is
3626    considered a polynomial evolution with constant step.  */
3627
3628 static bool
3629 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3630                                 tree * step, bool strict)
3631 {
3632   tree init_expr;
3633   tree step_expr;
3634
3635   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3636
3637   /* When there is no evolution in this loop, the evolution function
3638      is not "simple".  */
3639   if (evolution_part == NULL_TREE)
3640     return false;
3641
3642   /* When the evolution is a polynomial of degree >= 2
3643      the evolution function is not "simple".  */
3644   if (tree_is_chrec (evolution_part))
3645     return false;
3646
3647   step_expr = evolution_part;
3648   init_expr = unshare_expr (initial_condition (access_fn));
3649
3650   if (vect_debug_details (NULL))
3651     {
3652       fprintf (dump_file, "step: ");
3653       print_generic_expr (dump_file, step_expr, TDF_SLIM);
3654       fprintf (dump_file, ",  init: ");
3655       print_generic_expr (dump_file, init_expr, TDF_SLIM);
3656     }
3657
3658   *init = init_expr;
3659   *step = step_expr;
3660
3661   if (TREE_CODE (step_expr) != INTEGER_CST)
3662     {
3663       if (vect_debug_details (NULL))
3664         fprintf (dump_file, "step unknown.");
3665       return false;
3666     }
3667
3668   if (strict)
3669     if (!integer_onep (step_expr))
3670       {
3671         if (vect_debug_details (NULL))
3672           print_generic_expr (dump_file, step_expr, TDF_SLIM);
3673         return false;
3674       }
3675
3676   return true;
3677 }
3678
3679
3680 /* Function vect_analyze_scalar_cycles.
3681
3682    Examine the cross iteration def-use cycles of scalar variables, by
3683    analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3684    cycles that they represent do not impede vectorization.
3685
3686    FORNOW: Reduction as in the following loop, is not supported yet:
3687               loop1:
3688               for (i=0; i<N; i++)
3689                  sum += a[i];
3690            The cross-iteration cycle corresponding to variable 'sum' will be
3691            considered too complicated and will impede vectorization.
3692
3693    FORNOW: Induction as in the following loop, is not supported yet:
3694               loop2:
3695               for (i=0; i<N; i++)
3696                  a[i] = i;
3697
3698            However, the following loop *is* vectorizable:
3699               loop3:
3700               for (i=0; i<N; i++)
3701                  a[i] = b[i];
3702
3703            In both loops there exists a def-use cycle for the variable i:
3704               loop: i_2 = PHI (i_0, i_1)
3705                     a[i_2] = ...;
3706                     i_1 = i_2 + 1;
3707                     GOTO loop;
3708
3709            The evolution of the above cycle is considered simple enough,
3710            however, we also check that the cycle does not need to be
3711            vectorized, i.e - we check that the variable that this cycle
3712            defines is only used for array indexing or in stmts that do not
3713            need to be vectorized. This is not the case in loop2, but it
3714            *is* the case in loop3.  */
3715
3716 static bool
3717 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3718 {
3719   tree phi;
3720   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3721   basic_block bb = loop->header;
3722   tree dummy;
3723
3724   if (vect_debug_details (NULL))
3725     fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3726
3727   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3728     {
3729       tree access_fn = NULL;
3730
3731       if (vect_debug_details (NULL))
3732         {
3733           fprintf (dump_file, "Analyze phi: ");
3734           print_generic_expr (dump_file, phi, TDF_SLIM);
3735         }
3736
3737       /* Skip virtual phi's. The data dependences that are associated with
3738          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
3739
3740       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3741         {
3742           if (vect_debug_details (NULL))
3743             fprintf (dump_file, "virtual phi. skip.");
3744           continue;
3745         }
3746
3747       /* Analyze the evolution function.  */
3748
3749       /* FORNOW: The only scalar cross-iteration cycles that we allow are
3750          those of loop induction variables; This property is verified here.
3751
3752          Furthermore, if that induction variable is used in an operation
3753          that needs to be vectorized (i.e, is not solely used to index
3754          arrays and check the exit condition) - we do not support its
3755          vectorization yet. This property is verified in vect_is_simple_use,
3756          during vect_analyze_operations.  */
3757
3758       access_fn = /* instantiate_parameters
3759                      (loop,*/
3760          analyze_scalar_evolution (loop, PHI_RESULT (phi));
3761
3762       if (!access_fn)
3763         {
3764           if (vect_debug_stats (loop) || vect_debug_details (loop))
3765             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3766           return false;
3767         }
3768
3769       if (vect_debug_details (NULL))
3770         {
3771            fprintf (dump_file, "Access function of PHI: ");
3772            print_generic_expr (dump_file, access_fn, TDF_SLIM);
3773         }
3774
3775       if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3776                                         &dummy, false))
3777         {
3778           if (vect_debug_stats (loop) || vect_debug_details (loop))
3779             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3780           return false;
3781         }
3782     }
3783
3784   return true;
3785 }
3786
3787
3788 /* Function vect_analyze_data_ref_dependence.
3789
3790    Return TRUE if there (might) exist a dependence between a memory-reference
3791    DRA and a memory-reference DRB.  */
3792
3793 static bool
3794 vect_analyze_data_ref_dependence (struct data_reference *dra,
3795                                   struct data_reference *drb,
3796                                   struct loop *loop)
3797 {
3798   bool differ_p;
3799   struct data_dependence_relation *ddr;
3800
3801   if (!array_base_name_differ_p (dra, drb, &differ_p))
3802     {
3803       if (vect_debug_stats (loop) || vect_debug_details (loop))
3804         {
3805           fprintf (dump_file,
3806                 "not vectorized: can't determine dependence between: ");
3807           print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3808           fprintf (dump_file, " and ");
3809           print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3810         }
3811       return true;
3812     }
3813
3814   if (differ_p)
3815     return false;
3816
3817   ddr = initialize_data_dependence_relation (dra, drb);
3818   compute_affine_dependence (ddr);
3819
3820   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3821     return false;
3822
3823   if (vect_debug_stats (loop) || vect_debug_details (loop))
3824     {
3825       fprintf (dump_file,
3826         "not vectorized: possible dependence between data-refs ");
3827       print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3828       fprintf (dump_file, " and ");
3829       print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3830     }
3831
3832   return true;
3833 }
3834
3835
3836 /* Function vect_analyze_data_ref_dependences.
3837
3838    Examine all the data references in the loop, and make sure there do not
3839    exist any data dependences between them.
3840
3841    TODO: dependences which distance is greater than the vectorization factor
3842          can be ignored.  */
3843
3844 static bool
3845 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3846 {
3847   unsigned int i, j;
3848   varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3849   varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3850   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3851
3852   /* Examine store-store (output) dependences.  */
3853
3854   if (vect_debug_details (NULL))
3855     fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3856
3857   if (vect_debug_details (NULL))
3858     fprintf (dump_file, "compare all store-store pairs.");
3859
3860   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3861     {
3862       for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3863         {
3864           struct data_reference *dra =
3865             VARRAY_GENERIC_PTR (loop_write_refs, i);
3866           struct data_reference *drb =
3867             VARRAY_GENERIC_PTR (loop_write_refs, j);
3868           if (vect_analyze_data_ref_dependence (dra, drb, loop))
3869             return false;
3870         }
3871     }
3872
3873   /* Examine load-store (true/anti) dependences.  */
3874
3875   if (vect_debug_details (NULL))
3876     fprintf (dump_file, "compare all load-store pairs.");
3877
3878   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3879     {
3880       for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3881         {
3882           struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3883           struct data_reference *drb =
3884             VARRAY_GENERIC_PTR (loop_write_refs, j);
3885           if (vect_analyze_data_ref_dependence (dra, drb, loop))
3886             return false;
3887         }
3888     }
3889
3890   return true;
3891 }
3892
3893
3894 /* Function vect_get_first_index.
3895
3896    REF is a data reference.
3897    If it is an ARRAY_REF: if its lower bound is simple enough,
3898    put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3899    If it is not an ARRAY_REF: REF has no "first index";
3900    ARRAY_FIRST_INDEX in zero, and the function returns TRUE.  */
3901
3902 static bool
3903 vect_get_first_index (tree ref, tree *array_first_index)
3904 {
3905   tree array_start;
3906
3907   if (TREE_CODE (ref) != ARRAY_REF)
3908     *array_first_index = size_zero_node;
3909   else
3910     {
3911       array_start = array_ref_low_bound (ref);
3912       if (!host_integerp (array_start, 0))
3913         {
3914           if (vect_debug_details (NULL))
3915             {
3916               fprintf (dump_file, "array min val not simple integer cst.");
3917               print_generic_expr (dump_file, array_start, TDF_DETAILS);
3918             }
3919           return false;
3920         }
3921       *array_first_index = array_start;
3922     }
3923
3924   return true;
3925 }
3926
3927
3928 /* Function vect_compute_array_base_alignment.
3929    A utility function of vect_compute_array_ref_alignment.
3930
3931    Compute the misalignment of ARRAY in bits.
3932
3933    Input:
3934    ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3935    VECTYPE - we are interested in the misalignment modulo the size of vectype.
3936              if NULL: don't compute misalignment, just return the base of ARRAY.
3937    PREV_DIMENSIONS - initialized to one.
3938    MISALIGNMENT - the computed misalignment in bits.
3939
3940    Output:
3941    If VECTYPE is not NULL:
3942      Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3943      the base of the array, and put the computed misalignment in MISALIGNMENT.
3944    If VECTYPE is NULL:
3945      Return the base of the array.
3946
3947    For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3948    a[idx_N]...[idx_2][idx_1] is
3949    {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3950     ... + idx_N * dim_0 * ... * dim_N-1}.
3951    (The misalignment of &a is not checked here).
3952    Note, that every term contains dim_0, therefore, if dim_0 is a
3953    multiple of NUNITS, the whole sum is a multiple of NUNITS.
3954    Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3955    NUINTS, we can say that the misalignment of the sum is equal to
3956    the misalignment of {idx_1 * dim_0}.  If idx_1 is not constant,
3957    we can't determine this array misalignment, and we return
3958    false.
3959    We proceed recursively in this manner, accumulating total misalignment
3960    and the multiplication of previous dimensions for correct misalignment
3961    calculation.  */
3962
3963 static tree
3964 vect_compute_array_base_alignment (tree array,
3965                                    tree vectype,
3966                                    tree *prev_dimensions,
3967                                    tree *misalignment)
3968 {
3969   tree index;
3970   tree domain;
3971   tree dimension_size;
3972   tree mis;
3973   tree bits_per_vectype;
3974   tree bits_per_vectype_unit;
3975
3976   /* The 'stop condition' of the recursion.  */
3977   if (TREE_CODE (array) != ARRAY_REF)
3978     return array;
3979
3980   if (!vectype)
3981     /* Just get the base decl.  */
3982     return vect_compute_array_base_alignment
3983                 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3984
3985   if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3986       !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
3987     return NULL_TREE;
3988
3989   domain = TYPE_DOMAIN (TREE_TYPE (array));
3990   dimension_size =
3991         int_const_binop (PLUS_EXPR,
3992                 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
3993                                              TYPE_MIN_VALUE (domain), 1),
3994                 size_one_node, 1);
3995
3996   /* Check if the dimension size is a multiple of NUNITS, the remaining sum
3997      is a multiple of NUNITS:
3998
3999      dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4000    */
4001   mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4002          build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4003   if (integer_zerop (mis))
4004     /* This array is aligned. Continue just in order to get the base decl.  */
4005     return vect_compute_array_base_alignment
4006                 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4007
4008   index = TREE_OPERAND (array, 1);
4009   if (!host_integerp (index, 1))
4010     /* The current index is not constant.  */
4011     return NULL_TREE;
4012
4013   index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4014
4015   bits_per_vectype = fold_convert (unsigned_type_node,
4016     build_int_cst (NULL_TREE, BITS_PER_UNIT *
4017                  GET_MODE_SIZE (TYPE_MODE (vectype))));
4018   bits_per_vectype_unit =  fold_convert (unsigned_type_node,
4019     build_int_cst (NULL_TREE, BITS_PER_UNIT *
4020                  GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4021
4022   /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4023      earlier:
4024
4025      *misalignment =
4026        (*misalignment + index_val * dimension_size * *prev_dimensions)
4027                                                         % vectype_nunits;
4028    */
4029
4030   mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4031   mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4032   mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4033   mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4034   *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4035
4036
4037   *prev_dimensions = int_const_binop (MULT_EXPR,
4038                                 *prev_dimensions, dimension_size, 1);
4039
4040   return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4041                                             prev_dimensions,
4042                                             misalignment);
4043 }
4044
4045
4046 /* Function vect_compute_data_ref_alignment
4047
4048    Compute the misalignment of the data reference DR.
4049
4050    Output:
4051    1. If during the misalignment computation it is found that the data reference
4052       cannot be vectorized then false is returned.
4053    2. DR_MISALIGNMENT (DR) is defined.
4054
4055    FOR NOW: No analysis is actually performed. Misalignment is calculated
4056    only for trivial cases. TODO.  */
4057
4058 static bool
4059 vect_compute_data_ref_alignment (struct data_reference *dr,
4060                                  loop_vec_info loop_vinfo)
4061 {
4062   tree stmt = DR_STMT (dr);
4063   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4064   tree ref = DR_REF (dr);
4065   tree vectype;
4066   tree scalar_type;
4067   tree offset = size_zero_node;
4068   tree base, bit_offset, alignment;
4069   tree unit_bits = fold_convert (unsigned_type_node,
4070                                  build_int_cst (NULL_TREE, BITS_PER_UNIT));
4071   tree dr_base;
4072   bool base_aligned_p;
4073
4074   if (vect_debug_details (NULL))
4075     fprintf (dump_file, "vect_compute_data_ref_alignment:");
4076
4077   /* Initialize misalignment to unknown.  */
4078   DR_MISALIGNMENT (dr) = -1;
4079
4080   scalar_type = TREE_TYPE (ref);
4081   vectype = get_vectype_for_scalar_type (scalar_type);
4082   if (!vectype)
4083     {
4084       if (vect_debug_details (NULL))
4085         {
4086           fprintf (dump_file, "no vectype for stmt: ");
4087           print_generic_expr (dump_file, stmt, TDF_SLIM);
4088           fprintf (dump_file, " scalar_type: ");
4089           print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4090         }
4091       /* It is not possible to vectorize this data reference.  */
4092       return false;
4093     }
4094   STMT_VINFO_VECTYPE (stmt_info) = vectype;
4095   gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4096
4097   if (TREE_CODE (ref) == ARRAY_REF)
4098     dr_base = ref;
4099   else
4100     dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4101
4102   base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4103                           loop_vinfo, &bit_offset, &base_aligned_p);
4104   if (!base)
4105     {
4106       if (vect_debug_details (NULL))
4107         {
4108           fprintf (dump_file, "Unknown alignment for access: ");
4109           print_generic_expr (dump_file,
4110                               STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4111         }
4112       return true;
4113     }
4114
4115   if (!base_aligned_p)
4116     {
4117       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4118         {
4119           if (vect_debug_details (NULL))
4120             {
4121               fprintf (dump_file, "can't force alignment of ref: ");
4122               print_generic_expr (dump_file, ref, TDF_SLIM);
4123             }
4124           return true;
4125         }
4126
4127       /* Force the alignment of the decl.
4128          NOTE: This is the only change to the code we make during
4129          the analysis phase, before deciding to vectorize the loop.  */
4130       if (vect_debug_details (NULL))
4131         fprintf (dump_file, "force alignment");
4132       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4133       DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4134     }
4135
4136   /* At this point we assume that the base is aligned, and the offset from it
4137      (including index, if relevant) has been computed and is in BIT_OFFSET.  */
4138   gcc_assert (base_aligned_p
4139               || (TREE_CODE (base) == VAR_DECL
4140                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4141
4142   /* Convert into bytes.  */
4143   offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4144   /* Check that there is no remainder in bits.  */
4145   bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4146   if (!integer_zerop (bit_offset))
4147     {
4148       if (vect_debug_details (NULL))
4149         {
4150           fprintf (dump_file, "bit offset alignment: ");
4151           print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4152         }
4153       return false;
4154     }
4155
4156   /* Alignment required, in bytes:  */
4157   alignment = fold_convert (unsigned_type_node,
4158             build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4159
4160   /* Modulo alignment.  */
4161   offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4162   if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4163     {
4164       if (vect_debug_details (NULL))
4165         fprintf (dump_file, "unexpected misalign value");
4166       return false;
4167     }
4168
4169   DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4170
4171   if (vect_debug_details (NULL))
4172     fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4173
4174   return true;
4175 }
4176
4177
4178 /* Function vect_compute_array_ref_alignment
4179
4180    Compute the alignment of an array-ref.
4181    The alignment we compute here is relative to
4182    TYPE_ALIGN(VECTYPE) boundary.
4183
4184    Output:
4185    OFFSET - the alignment in bits
4186    Return value - the base of the array-ref. E.g,
4187                   if the array-ref is a.b[k].c[i][j] the returned
4188                   base is a.b[k].c
4189 */
4190
4191 static tree
4192 vect_compute_array_ref_alignment (struct data_reference *dr,
4193                                   loop_vec_info loop_vinfo,
4194                                   tree vectype,
4195                                   tree *offset)
4196 {
4197   tree array_first_index = size_zero_node;
4198   tree init;
4199   tree ref = DR_REF (dr);
4200   tree scalar_type = TREE_TYPE (ref);
4201   tree oprnd0 = TREE_OPERAND (ref, 0);
4202   tree dims = size_one_node;
4203   tree misalign = size_zero_node;
4204   tree next_ref, this_offset = size_zero_node;
4205   tree nunits;
4206   tree nbits;
4207
4208   if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4209     /* The reference is an array without its last index.  */
4210     next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4211                                                   &misalign);
4212   else
4213     next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4214                                                   &misalign);
4215   if (!vectype)
4216     /* Alignment is not requested. Just return the base.  */
4217     return next_ref;
4218
4219   /* Compute alignment.  */
4220   if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4221     return NULL_TREE;
4222   this_offset = misalign;
4223
4224   /* Check the first index accessed.  */
4225   if (!vect_get_first_index (ref, &array_first_index))
4226     {
4227       if (vect_debug_details (NULL))
4228         fprintf (dump_file, "no first_index for array.");
4229       return NULL_TREE;
4230     }
4231
4232   /* Check the index of the array_ref.  */
4233   init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4234                                         LOOP_VINFO_LOOP (loop_vinfo)->num);
4235
4236   /* FORNOW: In order to simplify the handling of alignment, we make sure
4237      that the first location at which the array is accessed ('init') is on an
4238      'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4239      This is too conservative, since we require that
4240      both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4241      NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4242      This should be relaxed in the future.  */
4243
4244   if (!init || !host_integerp (init, 0))
4245     {
4246       if (vect_debug_details (NULL))
4247         fprintf (dump_file, "non constant init. ");
4248       return NULL_TREE;
4249     }
4250
4251   /* bytes per scalar element: */
4252   nunits = fold_convert (unsigned_type_node,
4253         build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4254   nbits = int_const_binop (MULT_EXPR, nunits,
4255                            build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4256
4257   /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4258   misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4259   misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4260   misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4261
4262   /* TODO: allow negative misalign values.  */
4263   if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4264     {
4265       if (vect_debug_details (NULL))
4266         fprintf (dump_file, "unexpected misalign value");
4267       return NULL_TREE;
4268     }
4269   *offset = misalign;
4270   return next_ref;
4271 }
4272
4273
4274 /* Function vect_compute_data_refs_alignment
4275
4276    Compute the misalignment of data references in the loop.
4277    This pass may take place at function granularity instead of at loop
4278    granularity.
4279
4280    FOR NOW: No analysis is actually performed. Misalignment is calculated
4281    only for trivial cases. TODO.  */
4282
4283 static bool
4284 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4285 {
4286   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4287   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4288   unsigned int i;
4289
4290   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4291     {
4292       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4293       if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4294         return false;
4295     }
4296
4297   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4298     {
4299       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4300       if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4301         return false;
4302     }
4303
4304   return true;
4305 }
4306
4307
4308 /* Function vect_enhance_data_refs_alignment
4309
4310    This pass will use loop versioning and loop peeling in order to enhance
4311    the alignment of data references in the loop.
4312
4313    FOR NOW: we assume that whatever versioning/peeling takes place, only the
4314    original loop is to be vectorized; Any other loops that are created by
4315    the transformations performed in this pass - are not supposed to be
4316    vectorized. This restriction will be relaxed.  */
4317
4318 static void
4319 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4320 {
4321   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4322   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4323   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4324   unsigned int i;
4325
4326   /*
4327      This pass will require a cost model to guide it whether to apply peeling
4328      or versioning or a combination of the two. For example, the scheme that
4329      intel uses when given a loop with several memory accesses, is as follows:
4330      choose one memory access ('p') which alignment you want to force by doing
4331      peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4332      other accesses are not necessarily aligned, or (2) use loop versioning to
4333      generate one loop in which all accesses are aligned, and another loop in
4334      which only 'p' is necessarily aligned.
4335
4336      ("Automatic Intra-Register Vectorization for the Intel Architecture",
4337       Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4338       Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4339
4340      Devising a cost model is the most critical aspect of this work. It will
4341      guide us on which access to peel for, whether to use loop versioning, how
4342      many versions to create, etc. The cost model will probably consist of
4343      generic considerations as well as target specific considerations (on
4344      powerpc for example, misaligned stores are more painful than misaligned
4345      loads).
4346
4347      Here is the general steps involved in alignment enhancements:
4348
4349      -- original loop, before alignment analysis:
4350         for (i=0; i<N; i++){
4351           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
4352           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4353         }
4354
4355      -- After vect_compute_data_refs_alignment:
4356         for (i=0; i<N; i++){
4357           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4358           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4359         }
4360
4361      -- Possibility 1: we do loop versioning:
4362      if (p is aligned) {
4363         for (i=0; i<N; i++){    # loop 1A
4364           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4365           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4366         }
4367      }
4368      else {
4369         for (i=0; i<N; i++){    # loop 1B
4370           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4371           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4372         }
4373      }
4374
4375      -- Possibility 2: we do loop peeling:
4376      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4377         x = q[i];
4378         p[i] = y;
4379      }
4380      for (i = 3; i < N; i++){   # loop 2A
4381         x = q[i];                       # DR_MISALIGNMENT(q) = 0
4382         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
4383      }
4384
4385      -- Possibility 3: combination of loop peeling and versioning:
4386      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4387         x = q[i];
4388         p[i] = y;
4389      }
4390      if (p is aligned) {
4391         for (i = 3; i<N; i++){  # loop 3A
4392           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4393           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4394         }
4395      }
4396      else {
4397         for (i = 3; i<N; i++){  # loop 3B
4398           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4399           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4400         }
4401      }
4402
4403      These loops are later passed to loop_transform to be vectorized. The
4404      vectorizer will use the alignment information to guide the transformation
4405      (whether to generate regular loads/stores, or with special handling for
4406      misalignment).
4407    */
4408
4409   /* (1) Peeling to force alignment.  */
4410
4411   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4412      Considerations:
4413      + How many accesses will become aligned due to the peeling
4414      - How many accesses will become unaligned due to the peeling,
4415        and the cost of misaligned accesses.
4416      - The cost of peeling (the extra runtime checks, the increase
4417        in code size).
4418
4419      The scheme we use FORNOW: peel to force the alignment of the first
4420      misaligned store in the loop.
4421      Rationale: misaligned stores are not yet supported.
4422
4423      TODO: Use a better cost model.  */
4424
4425   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4426     {
4427       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4428       if (!aligned_access_p (dr))
4429         {
4430           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4431           LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4432           break;
4433         }
4434     }
4435
4436   if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4437     {
4438       if (vect_debug_details (loop))
4439         fprintf (dump_file, "Peeling for alignment will not be applied.");
4440       return;
4441     }
4442   else
4443     if (vect_debug_details (loop))
4444       fprintf (dump_file, "Peeling for alignment will be applied.");
4445
4446
4447   /* (1.2) Update the alignment info according to the peeling factor.
4448            If the misalignment of the DR we peel for is M, then the
4449            peeling factor is VF - M, and the misalignment of each access DR_i
4450            in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4451            If the misalignment of the DR we peel for is unknown, then the
4452            misalignment of each access DR_i in the loop is also unknown.
4453
4454            FORNOW: set the misalignment of the accesses to unknown even
4455                    if the peeling factor is known at compile time.
4456
4457            TODO: - if the peeling factor is known at compile time, use that
4458                    when updating the misalignment info of the loop DRs.
4459                  - consider accesses that are known to have the same
4460                    alignment, even if that alignment is unknown.  */
4461
4462   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4463     {
4464       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4465       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4466         DR_MISALIGNMENT (dr) = 0;
4467       else
4468         DR_MISALIGNMENT (dr) = -1;
4469     }
4470   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4471     {
4472       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4473       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4474         DR_MISALIGNMENT (dr) = 0;
4475       else
4476         DR_MISALIGNMENT (dr) = -1;
4477     }
4478 }
4479
4480
4481 /* Function vect_analyze_data_refs_alignment
4482
4483    Analyze the alignment of the data-references in the loop.
4484    FOR NOW: Until support for misliagned accesses is in place, only if all
4485    accesses are aligned can the loop be vectorized. This restriction will be
4486    relaxed.  */
4487
4488 static bool
4489 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4490 {
4491   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4492   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4493   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4494   enum dr_alignment_support supportable_dr_alignment;
4495   unsigned int i;
4496
4497   if (vect_debug_details (NULL))
4498     fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4499
4500
4501   /* This pass may take place at function granularity instead of at loop
4502      granularity.  */
4503
4504   if (!vect_compute_data_refs_alignment (loop_vinfo))
4505     {
4506       if (vect_debug_details (loop) || vect_debug_stats (loop))
4507         fprintf (dump_file,
4508                  "not vectorized: can't calculate alignment for data ref.");
4509       return false;
4510     }
4511
4512
4513   /* This pass will decide on using loop versioning and/or loop peeling in
4514      order to enhance the alignment of data references in the loop.  */
4515
4516   vect_enhance_data_refs_alignment (loop_vinfo);
4517
4518
4519   /* Finally, check that all the data references in the loop can be
4520      handled with respect to their alignment.  */
4521
4522   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4523     {
4524       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4525       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4526       if (!supportable_dr_alignment)
4527         {
4528           if (vect_debug_details (loop) || vect_debug_stats (loop))
4529             fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4530           return false;
4531         }
4532     }
4533   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4534     {
4535       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4536       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4537       if (!supportable_dr_alignment)
4538         {
4539           if (vect_debug_details (loop) || vect_debug_stats (loop))
4540             fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4541           return false;
4542         }
4543     }
4544
4545   return true;
4546 }
4547
4548
4549 /* Function vect_analyze_data_ref_access.
4550
4551    Analyze the access pattern of the data-reference DR. For now, a data access
4552    has to consecutive and aligned to be considered vectorizable.  */
4553
4554 static bool
4555 vect_analyze_data_ref_access (struct data_reference *dr)
4556 {
4557   varray_type access_fns = DR_ACCESS_FNS (dr);
4558   tree access_fn;
4559   tree init, step;
4560   unsigned int dimensions, i;
4561
4562   /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4563      i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4564      access is contiguous).  */
4565   dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4566
4567   for (i = 1; i < dimensions; i++) /* Not including the last dimension.  */
4568     {
4569       access_fn = DR_ACCESS_FN (dr, i);
4570
4571       if (evolution_part_in_loop_num (access_fn,
4572                                       loop_containing_stmt (DR_STMT (dr))->num))
4573         {
4574           /* Evolution part is not NULL in this loop (it is neither constant
4575              nor invariant).  */
4576           if (vect_debug_details (NULL))
4577             {
4578               fprintf (dump_file,
4579                        "not vectorized: complicated multidim. array access.");
4580               print_generic_expr (dump_file, access_fn, TDF_SLIM);
4581             }
4582           return false;
4583         }
4584     }
4585
4586   access_fn = DR_ACCESS_FN (dr, 0); /*  The last dimension access function.  */
4587   if (!evolution_function_is_constant_p (access_fn)
4588       && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4589                                        access_fn, &init, &step, true))
4590     {
4591       if (vect_debug_details (NULL))
4592         {
4593           fprintf (dump_file, "not vectorized: complicated access function.");
4594           print_generic_expr (dump_file, access_fn, TDF_SLIM);
4595         }
4596       return false;
4597     }
4598
4599   return true;
4600 }
4601
4602
4603 /* Function vect_analyze_data_ref_accesses.
4604
4605    Analyze the access pattern of all the data references in the loop.
4606
4607    FORNOW: the only access pattern that is considered vectorizable is a
4608            simple step 1 (consecutive) access.
4609
4610    FORNOW: handle only arrays and pointer accesses.  */
4611
4612 static bool
4613 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4614 {
4615   unsigned int i;
4616   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4617   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4618
4619   if (vect_debug_details (NULL))
4620     fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4621
4622   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4623     {
4624       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4625       bool ok = vect_analyze_data_ref_access (dr);
4626       if (!ok)
4627         {
4628           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4629               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4630             fprintf (dump_file, "not vectorized: complicated access pattern.");
4631           return false;
4632         }
4633     }
4634
4635   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4636     {
4637       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4638       bool ok = vect_analyze_data_ref_access (dr);
4639       if (!ok)
4640         {
4641           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4642               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4643             fprintf (dump_file, "not vectorized: complicated access pattern.");
4644           return false;
4645         }
4646     }
4647
4648   return true;
4649 }
4650
4651
4652 /* Function vect_analyze_pointer_ref_access.
4653
4654    Input:
4655    STMT - a stmt that contains a data-ref
4656    MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4657
4658    If the data-ref access is vectorizable, return a data_reference structure
4659    that represents it (DR). Otherwise - return NULL.  */
4660
4661 static struct data_reference *
4662 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4663 {
4664   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4665   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4666   tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4667   tree init, step;
4668   int step_val;
4669   tree reftype, innertype;
4670   enum machine_mode innermode;
4671   tree indx_access_fn;
4672   int loopnum = loop->num;
4673   struct data_reference *dr;
4674
4675   if (!access_fn)
4676     {
4677       if (vect_debug_stats (loop) || vect_debug_details (loop))
4678         fprintf (dump_file, "not vectorized: complicated pointer access.");
4679       return NULL;
4680     }
4681
4682   if (vect_debug_details (NULL))
4683     {
4684       fprintf (dump_file, "Access function of ptr: ");
4685       print_generic_expr (dump_file, access_fn, TDF_SLIM);
4686     }
4687
4688   if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4689     {
4690       if (vect_debug_stats (loop) || vect_debug_details (loop))
4691         fprintf (dump_file, "not vectorized: pointer access is not simple.");
4692       return NULL;
4693     }
4694
4695   STRIP_NOPS (init);
4696
4697   if (!host_integerp (step,0))
4698     {
4699       if (vect_debug_stats (loop) || vect_debug_details (loop))
4700         fprintf (dump_file,
4701                 "not vectorized: non constant step for pointer access.");
4702       return NULL;
4703     }
4704
4705   step_val = TREE_INT_CST_LOW (step);
4706
4707   reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4708   if (TREE_CODE (reftype) != POINTER_TYPE)
4709     {
4710       if (vect_debug_stats (loop) || vect_debug_details (loop))
4711         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4712       return NULL;
4713     }
4714
4715   reftype = TREE_TYPE (init);
4716   if (TREE_CODE (reftype) != POINTER_TYPE)
4717     {
4718       if (vect_debug_stats (loop) || vect_debug_details (loop))
4719         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4720       return NULL;
4721     }
4722
4723   innertype = TREE_TYPE (reftype);
4724   innermode = TYPE_MODE (innertype);
4725   if (GET_MODE_SIZE (innermode) != step_val)
4726     {
4727       /* FORNOW: support only consecutive access */
4728       if (vect_debug_stats (loop) || vect_debug_details (loop))
4729         fprintf (dump_file, "not vectorized: non consecutive access.");
4730       return NULL;
4731     }
4732
4733   indx_access_fn =
4734         build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4735   if (vect_debug_details (NULL))
4736     {
4737       fprintf (dump_file, "Access function of ptr indx: ");
4738       print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4739     }
4740   dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4741   return dr;
4742 }
4743
4744
4745 /* Function vect_get_symbl_and_dr.
4746
4747    The function returns SYMBL - the relevant variable for
4748    memory tag (for aliasing purposes).
4749    Also data reference structure DR is created.
4750
4751    Input:
4752    MEMREF - data reference in STMT
4753    IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4754
4755    Output:
4756    DR - data_reference struct for MEMREF
4757    return value - the relevant variable for memory tag (for aliasing purposes).
4758
4759 */
4760
4761 static tree
4762 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4763                        loop_vec_info loop_vinfo, struct data_reference **dr)
4764 {
4765   tree symbl, oprnd0, oprnd1;
4766   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4767   tree offset;
4768   tree array_base, base;
4769   struct data_reference *new_dr;
4770   bool base_aligned_p;
4771
4772   *dr = NULL;
4773   switch (TREE_CODE (memref))
4774     {
4775     case INDIRECT_REF:
4776       new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4777       if (! new_dr)
4778         return NULL_TREE;
4779       *dr = new_dr;
4780       symbl = DR_BASE_NAME (new_dr);
4781       STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4782
4783       switch (TREE_CODE (symbl))
4784         {
4785         case PLUS_EXPR:
4786         case MINUS_EXPR:
4787           oprnd0 = TREE_OPERAND (symbl, 0);
4788           oprnd1 = TREE_OPERAND (symbl, 1);
4789
4790           STRIP_NOPS(oprnd1);
4791           /* Only {address_base + offset} expressions are supported,
4792              where address_base can be POINTER_TYPE or ARRAY_TYPE and
4793              offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4794              TODO: swap operands if {offset + address_base}.  */
4795           if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4796                && TREE_CODE (oprnd1) != INTEGER_CST)
4797               || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4798             return NULL_TREE;
4799
4800           if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4801             symbl = oprnd0;
4802           else
4803             symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4804                                            loop_vinfo, &new_dr);
4805
4806         case SSA_NAME:
4807         case ADDR_EXPR:
4808           /* symbl remains unchanged.  */
4809           break;
4810
4811         default:
4812           if (vect_debug_details (NULL))
4813             {
4814               fprintf (dump_file, "unhandled data ref: ");
4815               print_generic_expr (dump_file, memref, TDF_SLIM);
4816               fprintf (dump_file, " (symbl ");
4817               print_generic_expr (dump_file, symbl, TDF_SLIM);
4818               fprintf (dump_file, ") in stmt  ");
4819               print_generic_expr (dump_file, stmt, TDF_SLIM);
4820             }
4821           return NULL_TREE;
4822         }
4823       break;
4824
4825     case ARRAY_REF:
4826       offset = size_zero_node;
4827
4828       /* Store the array base in the stmt info.
4829          For one dimensional array ref a[i], the base is a,
4830          for multidimensional a[i1][i2]..[iN], the base is
4831          a[i1][i2]..[iN-1].  */
4832       array_base = TREE_OPERAND (memref, 0);
4833       STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4834
4835       new_dr = analyze_array (stmt, memref, is_read);
4836       *dr = new_dr;
4837
4838       /* Find the relevant symbol for aliasing purposes.  */
4839       base = DR_BASE_NAME (new_dr);
4840       switch (TREE_CODE (base))
4841         {
4842         case VAR_DECL:
4843           symbl = base;
4844           break;
4845
4846         case INDIRECT_REF:
4847           symbl = TREE_OPERAND (base, 0);
4848           break;
4849
4850         case COMPONENT_REF:
4851           /* Could have recorded more accurate information -
4852              i.e, the actual FIELD_DECL that is being referenced -
4853              but later passes expect VAR_DECL as the nmt.  */
4854           symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4855                                         loop_vinfo, &offset, &base_aligned_p);
4856           if (symbl)
4857             break;
4858           /* fall through */
4859         default:
4860           if (vect_debug_details (NULL))
4861             {
4862               fprintf (dump_file, "unhandled struct/class field access ");
4863               print_generic_expr (dump_file, stmt, TDF_SLIM);
4864             }
4865           return NULL_TREE;
4866         }
4867       break;
4868
4869     default:
4870       if (vect_debug_details (NULL))
4871         {
4872           fprintf (dump_file, "unhandled data ref: ");
4873           print_generic_expr (dump_file, memref, TDF_SLIM);
4874           fprintf (dump_file, " in stmt  ");
4875           print_generic_expr (dump_file, stmt, TDF_SLIM);
4876         }
4877       return NULL_TREE;
4878     }
4879   return symbl;
4880 }
4881
4882
4883 /* Function vect_analyze_data_refs.
4884
4885    Find all the data references in the loop.
4886
4887    FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4888            which base is really an array (not a pointer) and which alignment
4889            can be forced. This restriction will be relaxed.  */
4890
4891 static bool
4892 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4893 {
4894   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4895   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4896   int nbbs = loop->num_nodes;
4897   block_stmt_iterator si;
4898   int j;
4899   struct data_reference *dr;
4900   tree tag;
4901   tree address_base;
4902   bool base_aligned_p;
4903   tree offset;
4904
4905   if (vect_debug_details (NULL))
4906     fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4907
4908   for (j = 0; j < nbbs; j++)
4909     {
4910       basic_block bb = bbs[j];
4911       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4912         {
4913           bool is_read = false;
4914           tree stmt = bsi_stmt (si);
4915           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4916           v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4917           v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4918           vuse_optype vuses = STMT_VUSE_OPS (stmt);
4919           varray_type *datarefs = NULL;
4920           int nvuses, nv_may_defs, nv_must_defs;
4921           tree memref = NULL;
4922           tree symbl;
4923
4924           /* Assumption: there exists a data-ref in stmt, if and only if
4925              it has vuses/vdefs.  */
4926
4927           if (!vuses && !v_may_defs && !v_must_defs)
4928             continue;
4929
4930           nvuses = NUM_VUSES (vuses);
4931           nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4932           nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4933
4934           if (nvuses && (nv_may_defs || nv_must_defs))
4935             {
4936               if (vect_debug_details (NULL))
4937                 {
4938                   fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4939                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4940                 }
4941               return false;
4942             }
4943
4944           if (TREE_CODE (stmt) != MODIFY_EXPR)
4945             {
4946               if (vect_debug_details (NULL))
4947                 {
4948                   fprintf (dump_file, "unexpected vops in stmt: ");
4949                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4950                 }
4951               return false;
4952             }
4953
4954           if (vuses)
4955             {
4956               memref = TREE_OPERAND (stmt, 1);
4957               datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4958               is_read = true;
4959             }
4960           else /* vdefs */
4961             {
4962               memref = TREE_OPERAND (stmt, 0);
4963               datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4964               is_read = false;
4965             }
4966
4967           /* Analyze MEMREF. If it is of a supported form, build data_reference
4968              struct for it (DR) and find the relevant symbol for aliasing
4969              purposes.  */
4970           symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4971                                          &dr);
4972           if (!symbl)
4973             {
4974               if (vect_debug_stats (loop) || vect_debug_details (loop))
4975                 {
4976                   fprintf (dump_file, "not vectorized: unhandled data ref: ");
4977                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4978                 }
4979               return false;
4980             }
4981
4982           /* Find and record the memtag assigned to this data-ref.  */
4983            switch (TREE_CODE (symbl))
4984             {
4985             case VAR_DECL:
4986               STMT_VINFO_MEMTAG (stmt_info) = symbl;
4987               break;
4988
4989             case SSA_NAME:
4990               symbl = SSA_NAME_VAR (symbl);
4991               tag = get_var_ann (symbl)->type_mem_tag;
4992               if (!tag)
4993                 {
4994                   tree ptr = TREE_OPERAND (memref, 0);
4995                   if (TREE_CODE (ptr) == SSA_NAME)
4996                     tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4997                 }
4998               if (!tag)
4999                 {
5000                   if (vect_debug_stats (loop) || vect_debug_details (loop))
5001                     fprintf (dump_file, "not vectorized: no memtag for ref.");
5002                   return false;
5003                 }
5004               STMT_VINFO_MEMTAG (stmt_info) = tag;
5005               break;
5006
5007             case ADDR_EXPR:
5008               address_base = TREE_OPERAND (symbl, 0);
5009
5010               switch (TREE_CODE (address_base))
5011                 {
5012                 case ARRAY_REF:
5013                   dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5014                                       DR_IS_READ(dr));
5015                   tag = vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr),
5016                            NULL_TREE, loop_vinfo, &offset, &base_aligned_p);
5017                   if (!tag)
5018                     {
5019                       if (vect_debug_stats (loop) || vect_debug_details (loop))
5020                         fprintf (dump_file, "not vectorized: no memtag for ref.");
5021                       return false;
5022                     }
5023                   STMT_VINFO_MEMTAG (stmt_info) = tag;
5024                   break;
5025
5026                 case VAR_DECL:
5027                   STMT_VINFO_MEMTAG (stmt_info) = address_base;
5028                   break;
5029
5030                 default:
5031                   if (vect_debug_stats (loop) || vect_debug_details (loop))
5032                     {
5033                       fprintf (dump_file,
5034                                "not vectorized: unhandled address expr: ");
5035                       print_generic_expr (dump_file, stmt, TDF_SLIM);
5036                     }
5037                   return false;
5038                 }
5039               break;
5040
5041             default:
5042               if (vect_debug_stats (loop) || vect_debug_details (loop))
5043                 {
5044                   fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5045                   print_generic_expr (dump_file, memref, TDF_SLIM);
5046                 }
5047               return false;
5048             }
5049
5050           VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5051           STMT_VINFO_DATA_REF (stmt_info) = dr;
5052         }
5053     }
5054
5055   return true;
5056 }
5057
5058
5059 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
5060
5061 /* Function vect_mark_relevant.
5062
5063    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
5064
5065 static void
5066 vect_mark_relevant (varray_type worklist, tree stmt)
5067 {
5068   stmt_vec_info stmt_info;
5069
5070   if (vect_debug_details (NULL))
5071     fprintf (dump_file, "mark relevant.");
5072
5073   if (TREE_CODE (stmt) == PHI_NODE)
5074     {
5075       VARRAY_PUSH_TREE (worklist, stmt);
5076       return;
5077     }
5078
5079   stmt_info = vinfo_for_stmt (stmt);
5080
5081   if (!stmt_info)
5082     {
5083       if (vect_debug_details (NULL))
5084         {
5085           fprintf (dump_file, "mark relevant: no stmt info!!.");
5086           print_generic_expr (dump_file, stmt, TDF_SLIM);
5087         }
5088       return;
5089     }
5090
5091   if (STMT_VINFO_RELEVANT_P (stmt_info))
5092     {
5093       if (vect_debug_details (NULL))
5094         fprintf (dump_file, "already marked relevant.");
5095       return;
5096     }
5097
5098   STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5099   VARRAY_PUSH_TREE (worklist, stmt);
5100 }
5101
5102
5103 /* Function vect_stmt_relevant_p.
5104
5105    Return true if STMT in loop that is represented by LOOP_VINFO is
5106    "relevant for vectorization".
5107
5108    A stmt is considered "relevant for vectorization" if:
5109    - it has uses outside the loop.
5110    - it has vdefs (it alters memory).
5111    - control stmts in the loop (except for the exit condition).
5112
5113    CHECKME: what other side effects would the vectorizer allow?  */
5114
5115 static bool
5116 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5117 {
5118   v_may_def_optype v_may_defs;
5119   v_must_def_optype v_must_defs;
5120   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5121   int i;
5122   dataflow_t df;
5123   int num_uses;
5124
5125   /* cond stmt other than loop exit cond.  */
5126   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5127     return true;
5128
5129   /* changing memory.  */
5130   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5131   v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5132   if (v_may_defs || v_must_defs)
5133     {
5134       if (vect_debug_details (NULL))
5135         fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5136       return true;
5137     }
5138
5139   /* uses outside the loop.  */
5140   df = get_immediate_uses (stmt);
5141   num_uses = num_immediate_uses (df);
5142   for (i = 0; i < num_uses; i++)
5143     {
5144       tree use = immediate_use (df, i);
5145       basic_block bb = bb_for_stmt (use);
5146       if (!flow_bb_inside_loop_p (loop, bb))
5147         {
5148           if (vect_debug_details (NULL))
5149             fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5150           return true;
5151         }
5152     }
5153
5154   return false;
5155 }
5156
5157
5158 /* Function vect_mark_stmts_to_be_vectorized.
5159
5160    Not all stmts in the loop need to be vectorized. For example:
5161
5162      for i...
5163        for j...
5164    1.    T0 = i + j
5165    2.    T1 = a[T0]
5166
5167    3.    j = j + 1
5168
5169    Stmt 1 and 3 do not need to be vectorized, because loop control and
5170    addressing of vectorized data-refs are handled differently.
5171
5172    This pass detects such stmts.  */
5173
5174 static bool
5175 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5176 {
5177   varray_type worklist;
5178   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5179   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5180   unsigned int nbbs = loop->num_nodes;
5181   block_stmt_iterator si;
5182   tree stmt;
5183   stmt_ann_t ann;
5184   unsigned int i;
5185   int j;
5186   use_optype use_ops;
5187   stmt_vec_info stmt_info;
5188
5189   if (vect_debug_details (NULL))
5190     fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5191
5192   VARRAY_TREE_INIT (worklist, 64, "work list");
5193
5194   /* 1. Init worklist.  */
5195
5196   for (i = 0; i < nbbs; i++)
5197     {
5198       basic_block bb = bbs[i];
5199       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5200         {
5201           stmt = bsi_stmt (si);
5202
5203           if (vect_debug_details (NULL))
5204             {
5205               fprintf (dump_file, "init: stmt relevant? ");
5206               print_generic_expr (dump_file, stmt, TDF_SLIM);
5207             }
5208
5209           stmt_info = vinfo_for_stmt (stmt);
5210           STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5211
5212           if (vect_stmt_relevant_p (stmt, loop_vinfo))
5213             vect_mark_relevant (worklist, stmt);
5214         }
5215     }
5216
5217
5218   /* 2. Process_worklist */
5219
5220   while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5221     {
5222       stmt = VARRAY_TOP_TREE (worklist);
5223       VARRAY_POP (worklist);
5224
5225       if (vect_debug_details (NULL))
5226         {
5227           fprintf (dump_file, "worklist: examine stmt: ");
5228           print_generic_expr (dump_file, stmt, TDF_SLIM);
5229         }
5230
5231       /* Examine the USES in this statement. Mark all the statements which
5232          feed this statement's uses as "relevant", unless the USE is used as
5233          an array index.  */
5234
5235       if (TREE_CODE (stmt) == PHI_NODE)
5236         {
5237           /* follow the def-use chain inside the loop.  */
5238           for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5239             {
5240               tree arg = PHI_ARG_DEF (stmt, j);
5241               tree def_stmt = NULL_TREE;
5242               basic_block bb;
5243               if (!vect_is_simple_use (arg, loop, &def_stmt))
5244                 {
5245                   if (vect_debug_details (NULL))
5246                     fprintf (dump_file, "worklist: unsupported use.");
5247                   varray_clear (worklist);
5248                   return false;
5249                 }
5250               if (!def_stmt)
5251                 continue;
5252
5253               if (vect_debug_details (NULL))
5254                 {
5255                   fprintf (dump_file, "worklist: def_stmt: ");
5256                   print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5257                 }
5258
5259               bb = bb_for_stmt (def_stmt);
5260               if (flow_bb_inside_loop_p (loop, bb))
5261                 vect_mark_relevant (worklist, def_stmt);
5262             }
5263         }
5264
5265       ann = stmt_ann (stmt);
5266       use_ops = USE_OPS (ann);
5267
5268       for (i = 0; i < NUM_USES (use_ops); i++)
5269         {
5270           tree use = USE_OP (use_ops, i);
5271
5272           /* We are only interested in uses that need to be vectorized. Uses
5273              that are used for address computation are not considered relevant.
5274            */
5275           if (exist_non_indexing_operands_for_use_p (use, stmt))
5276             {
5277               tree def_stmt = NULL_TREE;
5278               basic_block bb;
5279               if (!vect_is_simple_use (use, loop, &def_stmt))
5280                 {
5281                   if (vect_debug_details (NULL))
5282                     fprintf (dump_file, "worklist: unsupported use.");
5283                   varray_clear (worklist);
5284                   return false;
5285                 }
5286
5287               if (!def_stmt)
5288                 continue;
5289
5290               if (vect_debug_details (NULL))
5291                 {
5292                   fprintf (dump_file, "worklist: examine use %d: ", i);
5293                   print_generic_expr (dump_file, use, TDF_SLIM);
5294                 }
5295
5296               bb = bb_for_stmt (def_stmt);
5297               if (flow_bb_inside_loop_p (loop, bb))
5298                 vect_mark_relevant (worklist, def_stmt);
5299             }
5300         }
5301     }                           /* while worklist */
5302
5303   varray_clear (worklist);
5304   return true;
5305 }
5306
5307
5308 /* Function vect_can_advance_ivs_p
5309
5310    In case the number of iterations that LOOP iterates in unknown at compile
5311    time, an epilog loop will be generated, and the loop induction variables
5312    (IVs) will be "advanced" to the value they are supposed to take just before
5313    the epilog loop.  Here we check that the access function of the loop IVs
5314    and the expression that represents the loop bound are simple enough.
5315    These restrictions will be relaxed in the future.  */
5316
5317 static bool
5318 vect_can_advance_ivs_p (struct loop *loop)
5319 {
5320   basic_block bb = loop->header;
5321   tree phi;
5322
5323   /* Analyze phi functions of the loop header.  */
5324
5325   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5326     {
5327       tree access_fn = NULL;
5328       tree evolution_part;
5329
5330       if (vect_debug_details (NULL))
5331         {
5332           fprintf (dump_file, "Analyze phi: ");
5333           print_generic_expr (dump_file, phi, TDF_SLIM);
5334         }
5335
5336       /* Skip virtual phi's. The data dependences that are associated with
5337          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
5338
5339       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5340         {
5341           if (vect_debug_details (NULL))
5342             fprintf (dump_file, "virtual phi. skip.");
5343           continue;
5344         }
5345
5346       /* Analyze the evolution function.  */
5347
5348       access_fn = instantiate_parameters
5349         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5350
5351       if (!access_fn)
5352         {
5353           if (vect_debug_details (NULL))
5354             fprintf (dump_file, "No Access function.");
5355           return false;
5356         }
5357
5358       if (vect_debug_details (NULL))
5359         {
5360           fprintf (dump_file, "Access function of PHI: ");
5361           print_generic_expr (dump_file, access_fn, TDF_SLIM);
5362         }
5363
5364       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5365
5366       if (evolution_part == NULL_TREE)
5367         return false;
5368
5369       /* FORNOW: We do not transform initial conditions of IVs
5370          which evolution functions are a polynomial of degree >= 2.  */
5371
5372       if (tree_is_chrec (evolution_part))
5373         return false;
5374     }
5375
5376   return true;
5377 }
5378
5379
5380 /* Function vect_get_loop_niters.
5381
5382    Determine how many iterations the loop is executed.
5383    If an expression that represents the number of iterations
5384    can be constructed, place it in NUMBER_OF_ITERATIONS.
5385    Return the loop exit condition.  */
5386
5387 static tree
5388 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5389 {
5390   tree niters;
5391
5392   if (vect_debug_details (NULL))
5393     fprintf (dump_file, "\n<<get_loop_niters>>\n");
5394
5395   niters = number_of_iterations_in_loop (loop);
5396
5397   if (niters != NULL_TREE
5398       && niters != chrec_dont_know)
5399     {
5400       *number_of_iterations = niters;
5401
5402       if (vect_debug_details (NULL))
5403         {
5404           fprintf (dump_file, "==> get_loop_niters:" );
5405           print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5406         }
5407     }
5408
5409   return get_loop_exit_condition (loop);
5410 }
5411
5412
5413 /* Function vect_analyze_loop_form.
5414
5415    Verify the following restrictions (some may be relaxed in the future):
5416    - it's an inner-most loop
5417    - number of BBs = 2 (which are the loop header and the latch)
5418    - the loop has a pre-header
5419    - the loop has a single entry and exit
5420    - the loop exit condition is simple enough, and the number of iterations
5421      can be analyzed (a countable loop).  */
5422
5423 static loop_vec_info
5424 vect_analyze_loop_form (struct loop *loop)
5425 {
5426   loop_vec_info loop_vinfo;
5427   tree loop_cond;
5428   tree number_of_iterations = NULL;
5429   bool rescan = false;
5430
5431   if (vect_debug_details (loop))
5432     fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5433
5434   if (loop->inner
5435       || !loop->single_exit
5436       || loop->num_nodes != 2
5437       || EDGE_COUNT (loop->header->preds) != 2
5438       || loop->num_entries != 1)
5439     {
5440       if (vect_debug_stats (loop) || vect_debug_details (loop))
5441         {
5442           fprintf (dump_file, "not vectorized: bad loop form. ");
5443           if (loop->inner)
5444             fprintf (dump_file, "nested loop.");
5445           else if (!loop->single_exit)
5446             fprintf (dump_file, "multiple exits.");
5447           else if (loop->num_nodes != 2)
5448             fprintf (dump_file, "too many BBs in loop.");
5449           else if (EDGE_COUNT (loop->header->preds) != 2)
5450             fprintf (dump_file, "too many incoming edges.");
5451           else if (loop->num_entries != 1)
5452             fprintf (dump_file, "too many entries.");
5453         }
5454
5455       return NULL;
5456     }
5457
5458   /* We assume that the loop exit condition is at the end of the loop. i.e,
5459      that the loop is represented as a do-while (with a proper if-guard
5460      before the loop if needed), where the loop header contains all the
5461      executable statements, and the latch is empty.  */
5462   if (!empty_block_p (loop->latch))
5463     {
5464       if (vect_debug_stats (loop) || vect_debug_details (loop))
5465         fprintf (dump_file, "not vectorized: unexpectd loop form.");
5466       return NULL;
5467     }
5468
5469   /* Make sure we have a preheader basic block.  */
5470   if (!loop->pre_header)
5471     {
5472       rescan = true;
5473       loop_split_edge_with (loop_preheader_edge (loop), NULL);
5474     }
5475
5476   /* Make sure there exists a single-predecessor exit bb:  */
5477   if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5478     {
5479       rescan = true;
5480       loop_split_edge_with (loop->exit_edges[0], NULL);
5481     }
5482
5483   if (rescan)
5484     {
5485       flow_loop_scan (loop, LOOP_ALL);
5486       /* Flow loop scan does not update loop->single_exit field.  */
5487       loop->single_exit = loop->exit_edges[0];
5488     }
5489
5490   if (empty_block_p (loop->header))
5491     {
5492       if (vect_debug_stats (loop) || vect_debug_details (loop))
5493         fprintf (dump_file, "not vectorized: empty loop.");
5494       return NULL;
5495     }
5496
5497   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5498   if (!loop_cond)
5499     {
5500       if (vect_debug_stats (loop) || vect_debug_details (loop))
5501         fprintf (dump_file, "not vectorized: complicated exit condition.");
5502       return NULL;
5503     }
5504
5505   if (!number_of_iterations)
5506     {
5507       if (vect_debug_stats (loop) || vect_debug_details (loop))
5508         fprintf (dump_file,
5509                  "not vectorized: number of iterations cannot be computed.");
5510       return NULL;
5511     }
5512
5513   if (chrec_contains_undetermined (number_of_iterations))
5514     {
5515       if (vect_debug_details (NULL))
5516         fprintf (dump_file, "Infinite number of iterations.");
5517       return false;
5518     }
5519
5520   loop_vinfo = new_loop_vec_info (loop);
5521   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5522
5523   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5524     {
5525       if (vect_debug_details (loop))
5526         {
5527           fprintf (dump_file, "loop bound unknown.\n");
5528           fprintf (dump_file, "Symbolic number of iterations is ");
5529           print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5530         }
5531     }
5532   else
5533   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5534     {
5535       if (vect_debug_stats (loop) || vect_debug_details (loop))
5536         fprintf (dump_file, "not vectorized: number of iterations = 0.");
5537       return NULL;
5538     }
5539
5540   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5541
5542   return loop_vinfo;
5543 }
5544
5545
5546 /* Function vect_analyze_loop.
5547
5548    Apply a set of analyses on LOOP, and create a loop_vec_info struct
5549    for it. The different analyses will record information in the
5550    loop_vec_info struct.  */
5551
5552 static loop_vec_info
5553 vect_analyze_loop (struct loop *loop)
5554 {
5555   bool ok;
5556   loop_vec_info loop_vinfo;
5557
5558   if (vect_debug_details (NULL))
5559     fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5560
5561   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
5562
5563   loop_vinfo = vect_analyze_loop_form (loop);
5564   if (!loop_vinfo)
5565     {
5566       if (vect_debug_details (loop))
5567         fprintf (dump_file, "bad loop form.");
5568       return NULL;
5569     }
5570
5571   /* Find all data references in the loop (which correspond to vdefs/vuses)
5572      and analyze their evolution in the loop.
5573
5574      FORNOW: Handle only simple, array references, which
5575      alignment can be forced, and aligned pointer-references.  */
5576
5577   ok = vect_analyze_data_refs (loop_vinfo);
5578   if (!ok)
5579     {
5580       if (vect_debug_details (loop))
5581         fprintf (dump_file, "bad data references.");
5582       destroy_loop_vec_info (loop_vinfo);
5583       return NULL;
5584     }
5585
5586   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
5587
5588   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5589   if (!ok)
5590     {
5591       if (vect_debug_details (loop))
5592         fprintf (dump_file, "unexpected pattern.");
5593       if (vect_debug_details (loop))
5594         fprintf (dump_file, "not vectorized: unexpected pattern.");
5595       destroy_loop_vec_info (loop_vinfo);
5596       return NULL;
5597     }
5598
5599   /* Check that all cross-iteration scalar data-flow cycles are OK.
5600      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
5601
5602   ok = vect_analyze_scalar_cycles (loop_vinfo);
5603   if (!ok)
5604     {
5605       if (vect_debug_details (loop))
5606         fprintf (dump_file, "bad scalar cycle.");
5607       destroy_loop_vec_info (loop_vinfo);
5608       return NULL;
5609     }
5610
5611   /* Analyze data dependences between the data-refs in the loop.
5612      FORNOW: fail at the first data dependence that we encounter.  */
5613
5614   ok = vect_analyze_data_ref_dependences (loop_vinfo);
5615   if (!ok)
5616     {
5617       if (vect_debug_details (loop))
5618         fprintf (dump_file, "bad data dependence.");
5619       destroy_loop_vec_info (loop_vinfo);
5620       return NULL;
5621     }
5622
5623   /* Analyze the access patterns of the data-refs in the loop (consecutive,
5624      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
5625
5626   ok = vect_analyze_data_ref_accesses (loop_vinfo);
5627   if (!ok)
5628     {
5629       if (vect_debug_details (loop))
5630         fprintf (dump_file, "bad data access.");
5631       destroy_loop_vec_info (loop_vinfo);
5632       return NULL;
5633     }
5634
5635   /* Analyze the alignment of the data-refs in the loop.
5636      FORNOW: Only aligned accesses are handled.  */
5637
5638   ok = vect_analyze_data_refs_alignment (loop_vinfo);
5639   if (!ok)
5640     {
5641       if (vect_debug_details (loop))
5642         fprintf (dump_file, "bad data alignment.");
5643       destroy_loop_vec_info (loop_vinfo);
5644       return NULL;
5645     }
5646
5647   /* Scan all the operations in the loop and make sure they are
5648      vectorizable.  */
5649
5650   ok = vect_analyze_operations (loop_vinfo);
5651   if (!ok)
5652     {
5653       if (vect_debug_details (loop))
5654         fprintf (dump_file, "bad operation or unsupported loop bound.");
5655       destroy_loop_vec_info (loop_vinfo);
5656       return NULL;
5657     }
5658
5659   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5660
5661   return loop_vinfo;
5662 }
5663
5664
5665 /* Function need_imm_uses_for.
5666
5667    Return whether we ought to include information for 'var'
5668    when calculating immediate uses.  For this pass we only want use
5669    information for non-virtual variables.  */
5670
5671 static bool
5672 need_imm_uses_for (tree var)
5673 {
5674   return is_gimple_reg (var);
5675 }
5676
5677
5678 /* Function vectorize_loops.
5679
5680    Entry Point to loop vectorization phase.  */
5681
5682 void
5683 vectorize_loops (struct loops *loops)
5684 {
5685   unsigned int i, loops_num;
5686   unsigned int num_vectorized_loops = 0;
5687
5688   /* Does the target support SIMD?  */
5689   /* FORNOW: until more sophisticated machine modelling is in place.  */
5690   if (!UNITS_PER_SIMD_WORD)
5691     {
5692       if (vect_debug_details (NULL))
5693         fprintf (dump_file, "vectorizer: target vector size is not defined.");
5694       return;
5695     }
5696
5697 #ifdef ENABLE_CHECKING
5698   verify_loop_closed_ssa ();
5699 #endif
5700
5701   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5702
5703   /*  ----------- Analyze loops. -----------  */
5704
5705   /* If some loop was duplicated, it gets bigger number
5706      than all previously defined loops. This fact allows us to run
5707      only over initial loops skipping newly generated ones.  */
5708   loops_num = loops->num;
5709   for (i = 1; i < loops_num; i++)
5710     {
5711       loop_vec_info loop_vinfo;
5712       struct loop *loop = loops->parray[i];
5713
5714       if (!loop)
5715         continue;
5716
5717       loop_vinfo = vect_analyze_loop (loop);
5718       loop->aux = loop_vinfo;
5719
5720       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5721         continue;
5722
5723       vect_transform_loop (loop_vinfo, loops);
5724       num_vectorized_loops++;
5725     }
5726
5727   if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5728     fprintf (dump_file, "\nvectorized %u loops in function.\n",
5729              num_vectorized_loops);
5730
5731   /*  ----------- Finalize. -----------  */
5732
5733   free_df ();
5734   for (i = 1; i < loops_num; i++)
5735     {
5736       struct loop *loop = loops->parray[i];
5737       loop_vec_info loop_vinfo;
5738
5739       if (!loop)
5740         continue;
5741       loop_vinfo = loop->aux;
5742       destroy_loop_vec_info (loop_vinfo);
5743       loop->aux = NULL;
5744     }
5745
5746   rewrite_into_ssa (false);
5747   rewrite_into_loop_closed_ssa (); /* FORNOW */
5748   bitmap_clear (vars_to_rename);
5749 }