gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131
 132 #include "rtl.h"
 133 #include "basic-block.h"
 134 #include "diagnostic.h"
 135 #include "tree-flow.h"
 136 #include "tree-dump.h"
 137 #include "timevar.h"
 138 #include "cfgloop.h"
 139 #include "cfglayout.h"
 140 #include "expr.h"
 141 #include "optabs.h"
 142 #include "toplev.h"
 143 #include "tree-chrec.h"
 144 #include "tree-data-ref.h"
 145 #include "tree-scalar-evolution.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148 #include "langhooks.h"
 149
 150
 151 /*************************************************************************
 152   Simple Loop Peeling Utilities
 153  *************************************************************************/
 154
 155 /* Entry point for peeling of simple loops.
 156    Peel the first/last iterations of a loop.
 157    It can be used outside of the vectorizer for loops that are simple enough
 158    (see function documentation).  In the vectorizer it is used to peel the
 159    last few iterations when the loop bound is unknown or does not evenly
 160    divide by the vectorization factor, and to peel the first few iterations
 161    to force the alignment of data references in the loop.  */
 162 struct loop *slpeel_tree_peel_loop_to_edge
 163   (struct loop *, struct loops *, edge, tree, tree, bool);
 164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 165   (struct loop *, struct loops *, edge);
 166 static void slpeel_update_phis_for_duplicate_loop
 167   (struct loop *, struct loop *, bool after);
 168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
 169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
 170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
 172 static void allocate_new_names (bitmap);
 173 static void rename_use_op (use_operand_p);
 174 static void rename_def_op (def_operand_p, tree);
 175 static void rename_variables_in_bb (basic_block);
 176 static void free_new_names (bitmap);
 177 static void rename_variables_in_loop (struct loop *);
 178 #ifdef ENABLE_CHECKING
 179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
 180 #endif
 181
 182
 183 /*************************************************************************
 184   Vectorization Utilities.
 185  *************************************************************************/
 186
 187 /* Main analysis functions.  */
 188 static loop_vec_info vect_analyze_loop (struct loop *);
 189 static loop_vec_info vect_analyze_loop_form (struct loop *);
 190 static bool vect_analyze_data_refs (loop_vec_info);
 191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
 192 static bool vect_analyze_scalar_cycles (loop_vec_info);
 193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
 194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
 195 static bool vect_compute_data_refs_alignment (loop_vec_info);
 196 static bool vect_analyze_operations (loop_vec_info);
 197
 198 /* Main code transformation functions.  */
 199 static void vect_transform_loop (loop_vec_info, struct loops *);
 200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
 201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
 202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
 203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
 204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
 205 static enum dr_alignment_support vect_supportable_dr_alignment
 206   (struct data_reference *);
 207 static void vect_align_data_ref (tree);
 208 static void vect_enhance_data_refs_alignment (loop_vec_info);
 209
 210 /* Utility functions for the analyses.  */
 211 static bool vect_is_simple_use (tree , struct loop *, tree *);
 212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
 213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
 214 static void vect_mark_relevant (varray_type, tree);
 215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
 216 static tree vect_get_loop_niters (struct loop *, tree *);
 217 static bool vect_compute_data_ref_alignment
 218   (struct data_reference *, loop_vec_info);
 219 static bool vect_analyze_data_ref_access (struct data_reference *);
 220 static bool vect_get_first_index (tree, tree *);
 221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
 222 static struct data_reference * vect_analyze_pointer_ref_access
 223   (tree, tree, bool);
 224 static bool vect_can_advance_ivs_p (struct loop *);
 225 static tree vect_get_base_and_bit_offset
 226   (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
 227 static struct data_reference * vect_analyze_pointer_ref_access
 228   (tree, tree, bool);
 229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
 230 static tree vect_compute_array_ref_alignment
 231   (struct data_reference *, loop_vec_info, tree, tree *);
 232 static tree vect_get_ptr_offset (tree, tree, tree *);
 233 static tree vect_get_symbl_and_dr
 234   (tree, tree, bool, loop_vec_info, struct data_reference **);
 235
 236 /* Utility functions for the code transformation.  */
 237 static tree vect_create_destination_var (tree, tree);
 238 static tree vect_create_data_ref_ptr
 239   (tree, block_stmt_iterator *, tree, tree *, bool);
 240 static tree vect_create_index_for_vector_ref
 241   (struct loop *, block_stmt_iterator *);
 242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
 243 static tree get_vectype_for_scalar_type (tree);
 244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 245 static tree vect_get_vec_def_for_operand (tree, tree);
 246 static tree vect_init_vector (tree, tree);
 247 static void vect_finish_stmt_generation
 248   (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
 249
 250 /* Utility function dealing with loop peeling (not peeling itself).  */
 251 static void vect_generate_tmps_on_preheader
 252   (loop_vec_info, tree *, tree *, tree *);
 253 static tree vect_build_loop_niters (loop_vec_info);
 254 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
 255 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
 256 static void vect_update_inits_of_dr
 257   (struct data_reference *, struct loop *, tree niters);
 258 static void vect_update_inits_of_drs (loop_vec_info, tree);
 259 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
 260 static void vect_do_peeling_for_loop_bound
 261   (loop_vec_info, tree *, struct loops *);
 262
 263 /* Utilities for creation and deletion of vec_info structs.  */
 264 loop_vec_info new_loop_vec_info (struct loop *loop);
 265 void destroy_loop_vec_info (loop_vec_info);
 266 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
 267
 268 static bool vect_debug_stats (struct loop *loop);
 269 static bool vect_debug_details (struct loop *loop);
 270
 271 \f
 272 /*************************************************************************
 273   Simple Loop Peeling Utilities
 274
 275   Utilities to support loop peeling for vectorization purposes.
 276  *************************************************************************/
 277
 278
 279 /* For each definition in DEFINITIONS this function allocates
 280    new ssa name.  */
 281
 282 static void
 283 allocate_new_names (bitmap definitions)
 284 {
 285   unsigned ver;
 286   bitmap_iterator bi;
 287
 288   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 289     {
 290       tree def = ssa_name (ver);
 291       tree *new_name_ptr = xmalloc (sizeof (tree));
 292
 293       bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
 294
 295       *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
 296       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
 297
 298       SSA_NAME_AUX (def) = new_name_ptr;
 299     }
 300 }
 301
 302
 303 /* Renames the use *OP_P.  */
 304
 305 static void
 306 rename_use_op (use_operand_p op_p)
 307 {
 308   tree *new_name_ptr;
 309
 310   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 311     return;
 312
 313   new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
 314
 315   /* Something defined outside of the loop.  */
 316   if (!new_name_ptr)
 317     return;
 318
 319   /* An ordinary ssa name defined in the loop.  */
 320
 321   SET_USE (op_p, *new_name_ptr);
 322 }
 323
 324
 325 /* Renames the def *OP_P in statement STMT.  */
 326
 327 static void
 328 rename_def_op (def_operand_p op_p, tree stmt)
 329 {
 330   tree *new_name_ptr;
 331
 332   if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
 333     return;
 334
 335   new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
 336
 337   /* Something defined outside of the loop.  */
 338   if (!new_name_ptr)
 339     return;
 340
 341   /* An ordinary ssa name defined in the loop.  */
 342
 343   SET_DEF (op_p, *new_name_ptr);
 344   SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
 345 }
 346
 347
 348 /* Renames the variables in basic block BB.  */
 349
 350 static void
 351 rename_variables_in_bb (basic_block bb)
 352 {
 353   tree phi;
 354   block_stmt_iterator bsi;
 355   tree stmt;
 356   stmt_ann_t ann;
 357   use_optype uses;
 358   vuse_optype vuses;
 359   def_optype defs;
 360   v_may_def_optype v_may_defs;
 361   v_must_def_optype v_must_defs;
 362   unsigned i;
 363   edge e;
 364   edge_iterator ei;
 365   struct loop *loop = bb->loop_father;
 366
 367   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 368     rename_def_op (PHI_RESULT_PTR (phi), phi);
 369
 370   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 371     {
 372       stmt = bsi_stmt (bsi);
 373       get_stmt_operands (stmt);
 374       ann = stmt_ann (stmt);
 375
 376       uses = USE_OPS (ann);
 377       for (i = 0; i < NUM_USES (uses); i++)
 378         rename_use_op (USE_OP_PTR (uses, i));
 379
 380       defs = DEF_OPS (ann);
 381       for (i = 0; i < NUM_DEFS (defs); i++)
 382         rename_def_op (DEF_OP_PTR (defs, i), stmt);
 383
 384       vuses = VUSE_OPS (ann);
 385       for (i = 0; i < NUM_VUSES (vuses); i++)
 386         rename_use_op (VUSE_OP_PTR (vuses, i));
 387
 388       v_may_defs = V_MAY_DEF_OPS (ann);
 389       for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
 390         {
 391           rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
 392           rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
 393         }
 394
 395       v_must_defs = V_MUST_DEF_OPS (ann);
 396       for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
 397         {
 398           rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
 399           rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
 400         }
 401     }
 402
 403   FOR_EACH_EDGE (e, ei, bb->succs)
 404     {
 405       if (!flow_bb_inside_loop_p (loop, e->dest))
 406         continue;
 407       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 408         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 409     }
 410 }
 411
 412
 413 /* Releases the structures holding the new ssa names.  */
 414
 415 static void
 416 free_new_names (bitmap definitions)
 417 {
 418   unsigned ver;
 419   bitmap_iterator bi;
 420
 421   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 422     {
 423       tree def = ssa_name (ver);
 424
 425       if (SSA_NAME_AUX (def))
 426         {
 427           free (SSA_NAME_AUX (def));
 428           SSA_NAME_AUX (def) = NULL;
 429         }
 430     }
 431 }
 432
 433
 434 /* Renames variables in new generated LOOP.  */
 435
 436 static void
 437 rename_variables_in_loop (struct loop *loop)
 438 {
 439   unsigned i;
 440   basic_block *bbs;
 441
 442   bbs = get_loop_body (loop);
 443
 444   for (i = 0; i < loop->num_nodes; i++)
 445     rename_variables_in_bb (bbs[i]);
 446
 447   free (bbs);
 448 }
 449
 450
 451 /* Update the PHI nodes of NEW_LOOP.
 452
 453    NEW_LOOP is a duplicate of ORIG_LOOP.
 454    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 455    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 456    executes before it.  */
 457
 458 static void
 459 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 460                                        struct loop *new_loop, bool after)
 461 {
 462   tree *new_name_ptr, new_ssa_name;
 463   tree phi_new, phi_orig;
 464   tree def;
 465   edge orig_loop_latch = loop_latch_edge (orig_loop);
 466   edge orig_entry_e = loop_preheader_edge (orig_loop);
 467   edge new_loop_exit_e = new_loop->exit_edges[0];
 468   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 469   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 470
 471   /*
 472      step 1. For each loop-header-phi:
 473              Add the first phi argument for the phi in NEW_LOOP
 474             (the one associated with the entry of NEW_LOOP)
 475
 476      step 2. For each loop-header-phi:
 477              Add the second phi argument for the phi in NEW_LOOP
 478             (the one associated with the latch of NEW_LOOP)
 479
 480      step 3. Update the phis in the successor block of NEW_LOOP.
 481
 482         case 1: NEW_LOOP was placed before ORIG_LOOP:
 483                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 484                 Updating the phis in the successor block can therefore be done
 485                 along with the scanning of the loop header phis, because the
 486                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 487                 phi nodes, organized in the same order.
 488
 489         case 2: NEW_LOOP was placed after ORIG_LOOP:
 490                 The successor block of NEW_LOOP is the original exit block of
 491                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 492                 We postpone updating these phis to a later stage (when
 493                 loop guards are added).
 494    */
 495
 496
 497   /* Scan the phis in the headers of the old and new loops
 498      (they are organized in exactly the same order).  */
 499
 500   for (phi_new = phi_nodes (new_loop->header),
 501        phi_orig = phi_nodes (orig_loop->header);
 502        phi_new && phi_orig;
 503        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 504     {
 505       /* step 1.  */
 506       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 507       add_phi_arg (phi_new, def, new_loop_entry_e);
 508
 509       /* step 2.  */
 510       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 511       if (TREE_CODE (def) != SSA_NAME)
 512         continue;
 513
 514       new_name_ptr = SSA_NAME_AUX (def);
 515       if (!new_name_ptr)
 516         /* Something defined outside of the loop.  */
 517         continue;
 518
 519       /* An ordinary ssa name defined in the loop.  */
 520       new_ssa_name = *new_name_ptr;
 521       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 522
 523       /* step 3 (case 1).  */
 524       if (!after)
 525         {
 526           gcc_assert (new_loop_exit_e == orig_entry_e);
 527           SET_PHI_ARG_DEF (phi_orig,
 528                            phi_arg_from_edge (phi_orig, new_loop_exit_e),
 529                            new_ssa_name);
 530         }
 531     }
 532 }
 533
 534
 535 /* Update PHI nodes for a guard of the LOOP.
 536
 537    Input:
 538    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 539         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 540         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 541         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 542         We denote this bb NEW_MERGE_BB because it had a single predecessor (the
 543         LOOP header) before the guard code was added, and now it became a merge
 544         point of two paths - the path that ends with the LOOP exit-edge, and
 545         the path that ends with GUARD_EDGE.
 546
 547         This function creates and updates the relevant phi nodes to account for
 548         the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
 549         1. Create phi nodes at NEW_MERGE_BB.
 550         2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 551            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 552            was added:
 553
 554         ===> The CFG before the guard-code was added:
 555         LOOP_header_bb:
 556           if (exit_loop) goto update_bb : LOOP_header_bb
 557         update_bb:
 558
 559         ==> The CFG after the guard-code was added:
 560         guard_bb:
 561           if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
 562         LOOP_header_bb:
 563           if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
 564         new_merge_bb:
 565           goto update_bb
 566         update_bb:
 567
 568    - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
 569         UPDATE_BB are loop entry phis, like the phis in the LOOP header,
 570         organized in the same order.
 571         If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
 572         loop exit phis.
 573
 574    - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
 575         "original" loop).  FALSE if LOOP is an original loop (not a newly
 576         created copy).  The SSA_NAME_AUX fields of the defs in the original
 577         loop are the corresponding new ssa-names used in the new duplicated
 578         loop copy.  IS_NEW_LOOP indicates which of the two args of the phi
 579         nodes in UPDATE_BB takes the original ssa-name, and which takes the
 580         new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
 581         the LOOP-exit-edge takes the new-name, and the phi-arg that is
 582         associated with GUARD_EDGE takes the original name.  If IS_NEW_LOOP is
 583         FALSE, it's the other way around.
 584   */
 585
 586 static void
 587 slpeel_update_phi_nodes_for_guard (edge guard_edge,
 588                                    struct loop *loop,
 589                                    bool entry_phis,
 590                                    bool is_new_loop)
 591 {
 592   tree orig_phi, new_phi, update_phi;
 593   tree guard_arg, loop_arg;
 594   basic_block new_merge_bb = guard_edge->dest;
 595   edge e = EDGE_SUCC (new_merge_bb, 0);
 596   basic_block update_bb = e->dest;
 597   basic_block orig_bb = (entry_phis ? loop->header : update_bb);
 598
 599   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 600        orig_phi && update_phi;
 601        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 602     {
 603       /* 1. Generate new phi node in NEW_MERGE_BB:  */
 604       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 605                                  new_merge_bb);
 606
 607       /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 608             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 609       if (entry_phis)
 610         {
 611           loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 612                                             EDGE_SUCC (loop->latch, 0));
 613           guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
 614         }
 615       else /* exit phis */
 616         {
 617           tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 618           tree *new_name_ptr = SSA_NAME_AUX (orig_def);
 619           tree new_name;
 620
 621           if (new_name_ptr)
 622             new_name = *new_name_ptr;
 623           else
 624             /* Something defined outside of the loop  */
 625             new_name = orig_def;
 626
 627           if (is_new_loop)
 628             {
 629               guard_arg = orig_def;
 630               loop_arg = new_name;
 631             }
 632           else
 633             {
 634               guard_arg = new_name;
 635               loop_arg = orig_def;
 636             }
 637         }
 638       add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
 639       add_phi_arg (new_phi, guard_arg, guard_edge);
 640
 641       /* 3. Update phi in successor block.  */
 642       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 643                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 644       SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
 645                        PHI_RESULT (new_phi));
 646     }
 647
 648   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 649 }
 650
 651
 652 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 653    that starts at zero, increases by one and its limit is NITERS.
 654
 655    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 656
 657 static void
 658 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 659 {
 660   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 661   tree orig_cond;
 662   edge exit_edge = loop->exit_edges[0];
 663   block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
 664   tree begin_label = tree_block_label (loop->latch);
 665   tree exit_label = tree_block_label (loop->single_exit->dest);
 666   tree init = build_int_cst (TREE_TYPE (niters), 0);
 667   tree step = build_int_cst (TREE_TYPE (niters), 1);
 668   tree then_label;
 669   tree else_label;
 670
 671   orig_cond = get_loop_exit_condition (loop);
 672   gcc_assert (orig_cond);
 673   create_iv (init, step, NULL_TREE, loop,
 674              &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
 675
 676   /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
 677      back to the exit condition statement.  */
 678   bsi_next (&loop_exit_bsi);
 679   gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
 680
 681   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 682     {
 683       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 684       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 685       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 686     }
 687   else /* 'then' edge loops back.  */
 688     {
 689       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 690       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 691       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 692     }
 693
 694   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 695                      then_label, else_label);
 696   bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
 697
 698   /* Remove old loop exit test:  */
 699   bsi_remove (&loop_exit_bsi);
 700
 701   if (vect_debug_stats (loop) || vect_debug_details (loop))
 702     print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 703
 704   loop->nb_iterations = niters;
 705 }
 706
 707
 708 /* Given LOOP this function generates a new copy of it and puts it
 709    on E which is either the entry or exit of LOOP.  */
 710
 711 static struct loop *
 712 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 713                                         edge e)
 714 {
 715   struct loop *new_loop;
 716   basic_block *new_bbs, *bbs;
 717   bool at_exit;
 718   bool was_imm_dom;
 719   basic_block exit_dest;
 720   tree phi, phi_arg;
 721
 722   at_exit = (e == loop->exit_edges[0]);
 723   if (!at_exit && e != loop_preheader_edge (loop))
 724     {
 725       if (dump_file && (dump_flags & TDF_DETAILS))
 726           fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
 727       return NULL;
 728     }
 729
 730   bbs = get_loop_body (loop);
 731
 732   /* Check whether duplication is possible.  */
 733   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 734     {
 735       if (vect_debug_stats (loop) || vect_debug_details (loop))
 736           fprintf (dump_file, "Cannot copy basic blocks.\n");
 737       free (bbs);
 738       return NULL;
 739     }
 740
 741   /* Generate new loop structure.  */
 742   new_loop = duplicate_loop (loops, loop, loop->outer);
 743   if (!new_loop)
 744     {
 745       if (vect_debug_stats (loop) || vect_debug_details (loop))
 746           fprintf (dump_file, "duplicate_loop returns NULL.\n");
 747       free (bbs);
 748       return NULL;
 749     }
 750
 751   exit_dest = loop->exit_edges[0]->dest;
 752   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 753                                           exit_dest) == loop->header ?
 754                  true : false);
 755
 756   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 757
 758   copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
 759
 760   /* Duplicating phi args at exit bbs as coming
 761      also from exit of duplicated loop.  */
 762   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 763     {
 764       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
 765       if (phi_arg)
 766         {
 767           edge new_loop_exit_edge;
 768
 769           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 770             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 771           else
 772             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 773
 774           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 775         }
 776     }
 777
 778   if (at_exit) /* Add the loop copy at exit.  */
 779     {
 780       redirect_edge_and_branch_force (e, new_loop->header);
 781       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 782       if (was_imm_dom)
 783         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 784     }
 785   else /* Add the copy at entry.  */
 786     {
 787       edge new_exit_e;
 788       edge entry_e = loop_preheader_edge (loop);
 789       basic_block preheader = entry_e->src;
 790
 791       if (!flow_bb_inside_loop_p (new_loop,
 792                                   EDGE_SUCC (new_loop->header, 0)->dest))
 793         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 794       else
 795         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 796
 797       redirect_edge_and_branch_force (new_exit_e, loop->header);
 798       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 799                                new_exit_e->src);
 800
 801       /* We have to add phi args to the loop->header here as coming
 802          from new_exit_e edge.  */
 803       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 804         {
 805           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 806           if (phi_arg)
 807             add_phi_arg (phi, phi_arg, new_exit_e);
 808         }
 809
 810       redirect_edge_and_branch_force (entry_e, new_loop->header);
 811       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 812     }
 813
 814   flow_loop_scan (new_loop, LOOP_ALL);
 815   flow_loop_scan (loop, LOOP_ALL);
 816   free (new_bbs);
 817   free (bbs);
 818
 819   return new_loop;
 820 }
 821
 822
 823 /* Given the condition statement COND, put it as the last statement
 824    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 825    Assumes that this is the single exit of the guarded loop.
 826    Returns the skip edge.  */
 827
 828 static edge
 829 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 830                         basic_block dom_bb)
 831 {
 832   block_stmt_iterator bsi;
 833   edge new_e, enter_e;
 834   tree cond_stmt, then_label, else_label;
 835
 836   enter_e = EDGE_SUCC (guard_bb, 0);
 837   enter_e->flags &= ~EDGE_FALLTHRU;
 838   enter_e->flags |= EDGE_FALSE_VALUE;
 839   bsi = bsi_last (guard_bb);
 840
 841   then_label = build1 (GOTO_EXPR, void_type_node,
 842                        tree_block_label (exit_bb));
 843   else_label = build1 (GOTO_EXPR, void_type_node,
 844                        tree_block_label (enter_e->dest));
 845   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 846                      then_label, else_label);
 847   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 848   /* Add new edge to connect entry block to the second loop.  */
 849   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 850   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 851   return new_e;
 852 }
 853
 854
 855 /* This function verifies that the following restrictions apply to LOOP:
 856    (1) it is innermost
 857    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 858    (3) it is single entry, single exit
 859    (4) its exit condition is the last stmt in the header
 860    (5) E is the entry/exit edge of LOOP.
 861  */
 862
 863 static bool
 864 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 865 {
 866   edge exit_e = loop->exit_edges [0];
 867   edge entry_e = loop_preheader_edge (loop);
 868   tree orig_cond = get_loop_exit_condition (loop);
 869   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 870
 871   if (any_marked_for_rewrite_p ())
 872     return false;
 873
 874   if (loop->inner
 875       /* All loops have an outer scope; the only case loop->outer is NULL is for
 876          the function itself.  */
 877       || !loop->outer
 878       || loop->num_nodes != 2
 879       || !empty_block_p (loop->latch)
 880       || loop->num_exits != 1
 881       || loop->num_entries != 1
 882       /* Verify that new loop exit condition can be trivially modified.  */
 883       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 884       || (e != exit_e && e != entry_e))
 885     return false;
 886
 887   return true;
 888 }
 889
 890 #ifdef ENABLE_CHECKING
 891 static void
 892 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 893                                  struct loop *second_loop)
 894 {
 895   basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
 896   basic_block loop2_entry_bb = second_loop->pre_header;
 897   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 898
 899   /* A guard that controls whether the second_loop is to be executed or skipped
 900      is placed in first_loop->exit.  first_loopt->exit therefore has two
 901      successors - one is the preheader of second_loop, and the other is a bb
 902      after second_loop.
 903    */
 904   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 905
 906
 907   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 908         of second_loop.  */
 909
 910   /* The preheader of new_loop is expected to have two predessors:
 911      first_loop->exit and the block that precedes first_loop.  */
 912
 913   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 914               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 915                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 916                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 917                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 918
 919   /* Verify that the other successor of first_loopt->exit is after the
 920      second_loop.  */
 921   /* TODO */
 922 }
 923 #endif
 924
 925 /* Function slpeel_tree_peel_loop_to_edge.
 926
 927    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 928    that is placed on the entry (exit) edge E of LOOP. After this transformation
 929    we have two loops one after the other - first-loop iterates FIRST_NITERS
 930    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 931
 932    Input:
 933    - LOOP: the loop to be peeled.
 934    - E: the exit or entry edge of LOOP.
 935         If it is the entry edge, we peel the first iterations of LOOP. In this
 936         case first-loop is LOOP, and second-loop is the newly created loop.
 937         If it is the exit edge, we peel the last iterations of LOOP. In this
 938         case, first-loop is the newly created loop, and second-loop is LOOP.
 939    - NITERS: the number of iterations that LOOP iterates.
 940    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 941    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 942         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 943         is false, the caller of this function may want to take care of this
 944         (this can be useful if we don't want new stmts added to first-loop).
 945
 946    Output:
 947    The function returns a pointer to the new loop-copy, or NULL if it failed
 948    to perform the transformation.
 949
 950    The function generates two if-then-else guards: one before the first loop,
 951    and the other before the second loop:
 952    The first guard is:
 953      if (FIRST_NITERS == 0) then skip the first loop,
 954      and go directly to the second loop.
 955    The second guard is:
 956      if (FIRST_NITERS == NITERS) then skip the second loop.
 957
 958    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
 959    FORNOW the resulting code will not be in loop-closed-ssa form.
 960 */
 961
 962 struct loop*
 963 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
 964                                edge e, tree first_niters,
 965                                tree niters, bool update_first_loop_count)
 966 {
 967   struct loop *new_loop = NULL, *first_loop, *second_loop;
 968   edge skip_e;
 969   tree pre_condition;
 970   bitmap definitions;
 971   basic_block bb_before_second_loop, bb_after_second_loop;
 972   basic_block bb_before_first_loop;
 973   basic_block bb_between_loops;
 974   edge exit_e = loop->exit_edges [0];
 975
 976   if (!slpeel_can_duplicate_loop_p (loop, e))
 977     return NULL;
 978
 979   /* We have to initialize cfg_hooks. Then, when calling
 980    cfg_hooks->split_edge, the function tree_split_edge
 981    is actually called and, when calling cfg_hooks->duplicate_block,
 982    the function tree_duplicate_bb is called.  */
 983   tree_register_cfg_hooks ();
 984
 985
 986   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
 987         Resulting CFG would be:
 988
 989         first_loop:
 990         do {
 991         } while ...
 992
 993         second_loop:
 994         do {
 995         } while ...
 996
 997         orig_exit_bb:
 998    */
 999
1000   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1001     {
1002       if (vect_debug_stats (loop) || vect_debug_details (loop))
1003         fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1004       return NULL;
1005     }
1006
1007   if (e == exit_e)
1008     {
1009       /* NEW_LOOP was placed after LOOP.  */
1010       first_loop = loop;
1011       second_loop = new_loop;
1012     }
1013   else
1014     {
1015       /* NEW_LOOP was placed before LOOP.  */
1016       first_loop = new_loop;
1017       second_loop = loop;
1018     }
1019
1020   definitions = marked_ssa_names ();
1021   allocate_new_names (definitions);
1022   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1023   rename_variables_in_loop (new_loop);
1024
1025
1026   /* 2. Add the guard that controls whether the first loop is executed.
1027         Resulting CFG would be:
1028
1029         bb_before_first_loop:
1030         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1031                                GOTO first-loop
1032
1033         first_loop:
1034         do {
1035         } while ...
1036
1037         bb_before_second_loop:
1038
1039         second_loop:
1040         do {
1041         } while ...
1042
1043         orig_exit_bb:
1044    */
1045
1046   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1047   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1048   bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1049   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1050   flow_loop_scan (first_loop, LOOP_ALL);
1051   flow_loop_scan (second_loop, LOOP_ALL);
1052
1053   pre_condition =
1054         build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1055   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1056                                   bb_before_second_loop, bb_before_first_loop);
1057   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1058                                      first_loop == new_loop);
1059
1060
1061   /* 3. Add the guard that controls whether the second loop is executed.
1062         Resulting CFG would be:
1063
1064         bb_before_first_loop:
1065         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1066                                GOTO first-loop
1067
1068         first_loop:
1069         do {
1070         } while ...
1071
1072         bb_between_loops:
1073         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1074                                     GOTO bb_before_second_loop
1075
1076         bb_before_second_loop:
1077
1078         second_loop:
1079         do {
1080         } while ...
1081
1082         bb_after_second_loop:
1083
1084         orig_exit_bb:
1085    */
1086
1087   bb_between_loops = split_edge (first_loop->exit_edges[0]);
1088   add_bb_to_loop (bb_between_loops, first_loop->outer);
1089   bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1090   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1091   flow_loop_scan (first_loop, LOOP_ALL);
1092   flow_loop_scan (second_loop, LOOP_ALL);
1093
1094   pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1095   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1096                                   bb_after_second_loop, bb_before_first_loop);
1097   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1098                                      second_loop == new_loop);
1099
1100   /* Flow loop scan does not update loop->single_exit field.  */
1101   first_loop->single_exit = first_loop->exit_edges[0];
1102   second_loop->single_exit = second_loop->exit_edges[0];
1103
1104   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1105    */
1106   if (update_first_loop_count)
1107     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1108
1109   free_new_names (definitions);
1110   BITMAP_XFREE (definitions);
1111   unmark_all_for_rewrite ();
1112
1113   return new_loop;
1114 }
1115
1116 \f
1117 /* Here the proper Vectorizer starts.  */
1118
1119 /*************************************************************************
1120   Vectorization Utilities.
1121  *************************************************************************/
1122
1123 /* Function new_stmt_vec_info.
1124
1125    Create and initialize a new stmt_vec_info struct for STMT.  */
1126
1127 stmt_vec_info
1128 new_stmt_vec_info (tree stmt, struct loop *loop)
1129 {
1130   stmt_vec_info res;
1131   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1132
1133   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1134   STMT_VINFO_STMT (res) = stmt;
1135   STMT_VINFO_LOOP (res) = loop;
1136   STMT_VINFO_RELEVANT_P (res) = 0;
1137   STMT_VINFO_VECTYPE (res) = NULL;
1138   STMT_VINFO_VEC_STMT (res) = NULL;
1139   STMT_VINFO_DATA_REF (res) = NULL;
1140   STMT_VINFO_MEMTAG (res) = NULL;
1141   STMT_VINFO_VECT_DR_BASE (res) = NULL;
1142
1143   return res;
1144 }
1145
1146
1147 /* Function new_loop_vec_info.
1148
1149    Create and initialize a new loop_vec_info struct for LOOP, as well as
1150    stmt_vec_info structs for all the stmts in LOOP.  */
1151
1152 loop_vec_info
1153 new_loop_vec_info (struct loop *loop)
1154 {
1155   loop_vec_info res;
1156   basic_block *bbs;
1157   block_stmt_iterator si;
1158   unsigned int i;
1159
1160   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1161
1162   bbs = get_loop_body (loop);
1163
1164   /* Create stmt_info for all stmts in the loop.  */
1165   for (i = 0; i < loop->num_nodes; i++)
1166     {
1167       basic_block bb = bbs[i];
1168       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1169         {
1170           tree stmt = bsi_stmt (si);
1171           stmt_ann_t ann;
1172
1173           get_stmt_operands (stmt);
1174           ann = stmt_ann (stmt);
1175           set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1176         }
1177     }
1178
1179   LOOP_VINFO_LOOP (res) = loop;
1180   LOOP_VINFO_BBS (res) = bbs;
1181   LOOP_VINFO_EXIT_COND (res) = NULL;
1182   LOOP_VINFO_NITERS (res) = NULL;
1183   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1184   LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1185   LOOP_VINFO_VECT_FACTOR (res) = 0;
1186   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1187                            "loop_write_datarefs");
1188   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1189                            "loop_read_datarefs");
1190   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1191
1192   return res;
1193 }
1194
1195
1196 /* Function destroy_loop_vec_info.
1197
1198    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1199    stmts in the loop.  */
1200
1201 void
1202 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1203 {
1204   struct loop *loop;
1205   basic_block *bbs;
1206   int nbbs;
1207   block_stmt_iterator si;
1208   int j;
1209
1210   if (!loop_vinfo)
1211     return;
1212
1213   loop = LOOP_VINFO_LOOP (loop_vinfo);
1214
1215   bbs = LOOP_VINFO_BBS (loop_vinfo);
1216   nbbs = loop->num_nodes;
1217
1218   for (j = 0; j < nbbs; j++)
1219     {
1220       basic_block bb = bbs[j];
1221       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1222         {
1223           tree stmt = bsi_stmt (si);
1224           stmt_ann_t ann = stmt_ann (stmt);
1225           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1226           free (stmt_info);
1227           set_stmt_info (ann, NULL);
1228         }
1229     }
1230
1231   free (LOOP_VINFO_BBS (loop_vinfo));
1232   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1233   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1234
1235   free (loop_vinfo);
1236 }
1237
1238
1239 /* Function debug_loop_stats.
1240
1241    For vectorization statistics dumps.  */
1242
1243 static bool
1244 vect_debug_stats (struct loop *loop)
1245 {
1246   basic_block bb;
1247   block_stmt_iterator si;
1248   tree node = NULL_TREE;
1249
1250   if (!dump_file || !(dump_flags & TDF_STATS))
1251     return false;
1252
1253   if (!loop)
1254     {
1255       fprintf (dump_file, "\n");
1256       return true;
1257     }
1258
1259   if (!loop->header)
1260     return false;
1261
1262   bb = loop->header;
1263
1264   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1265     {
1266       node = bsi_stmt (si);
1267       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1268         break;
1269     }
1270
1271   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1272       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1273     {
1274       fprintf (dump_file, "\nloop at %s:%d: ",
1275         EXPR_FILENAME (node), EXPR_LINENO (node));
1276       return true;
1277     }
1278
1279   return false;
1280 }
1281
1282
1283 /* Function debug_loop_details.
1284
1285    For vectorization debug dumps.  */
1286
1287 static bool
1288 vect_debug_details (struct loop *loop)
1289 {
1290    basic_block bb;
1291    block_stmt_iterator si;
1292    tree node = NULL_TREE;
1293
1294   if (!dump_file || !(dump_flags & TDF_DETAILS))
1295     return false;
1296
1297   if (!loop)
1298     {
1299       fprintf (dump_file, "\n");
1300       return true;
1301     }
1302
1303   if (!loop->header)
1304     return false;
1305
1306   bb = loop->header;
1307
1308   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1309     {
1310       node = bsi_stmt (si);
1311       if (node && EXPR_P (node) && EXPR_LOCUS (node))
1312         break;
1313     }
1314
1315   if (node && EXPR_P (node) && EXPR_LOCUS (node)
1316       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1317     {
1318       fprintf (dump_file, "\nloop at %s:%d: ",
1319                EXPR_FILENAME (node), EXPR_LINENO (node));
1320       return true;
1321     }
1322
1323   return false;
1324 }
1325
1326
1327 /* Function vect_get_ptr_offset
1328
1329    Compute the OFFSET modulo vector-type alignment of pointer REF in bits.  */
1330
1331 static tree
1332 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1333                      tree vectype ATTRIBUTE_UNUSED,
1334                      tree *offset ATTRIBUTE_UNUSED)
1335 {
1336   /* TODO: Use alignment information.  */
1337   return NULL_TREE;
1338 }
1339
1340
1341 /* Function vect_get_base_and_bit_offset
1342
1343    Return the BASE of the data reference EXPR.
1344    If VECTYPE is given, also compute the OFFSET from BASE in bits.
1345    E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1346    bits of 'a.b[i] + 4B' from a.
1347
1348    Input:
1349    EXPR - the memory reference that is being analyzed
1350    DR - the data_reference struct of the _original_ memory reference
1351         (Note: DR_REF (DR) is not necessarily EXPR)
1352    VECTYPE - the type that defines the alignment (i.e, we compute
1353              alignment relative to TYPE_ALIGN(VECTYPE))
1354
1355    Output:
1356    BASE (returned value) - the base of the data reference EXPR.
1357                            E.g, if EXPR is a.b[k].c[i][j] the returned
1358                            base is a.
1359    OFFSET - offset of EXPR from BASE in bits
1360    BASE_ALIGNED_P - indicates if BASE is aligned
1361
1362    If something unexpected is encountered (an unsupported form of data-ref),
1363    or if VECTYPE is given but OFFSET cannot be determined:
1364    then NULL_TREE is returned.  */
1365
1366 static tree
1367 vect_get_base_and_bit_offset (struct data_reference *dr,
1368                               tree expr,
1369                               tree vectype,
1370                               loop_vec_info loop_vinfo,
1371                               tree *offset,
1372                               bool *base_aligned_p)
1373 {
1374   tree this_offset = size_zero_node;
1375   tree base = NULL_TREE;
1376   tree next_ref;
1377   tree oprnd0, oprnd1;
1378   struct data_reference *array_dr;
1379   enum tree_code code = TREE_CODE (expr);
1380
1381   *base_aligned_p = false;
1382
1383   switch (code)
1384     {
1385     /* These cases end the recursion:  */
1386     case VAR_DECL:
1387       *offset = size_zero_node;
1388       if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1389         *base_aligned_p = true;
1390       return expr;
1391
1392     case SSA_NAME:
1393       if (!vectype)
1394         return expr;
1395
1396       if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1397         return NULL_TREE;
1398
1399       if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1400         {
1401           base = vect_get_ptr_offset (expr, vectype, offset);
1402           if (base)
1403             *base_aligned_p = true;
1404         }
1405       else
1406         {
1407           *base_aligned_p = true;
1408           *offset = size_zero_node;
1409           base = expr;
1410         }
1411       return base;
1412
1413     case INTEGER_CST:
1414       *offset = int_const_binop (MULT_EXPR, expr,
1415                                  build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1416       return expr;
1417
1418     /* These cases continue the recursion:  */
1419     case COMPONENT_REF:
1420       oprnd0 = TREE_OPERAND (expr, 0);
1421       oprnd1 = TREE_OPERAND (expr, 1);
1422
1423       this_offset = bit_position (oprnd1);
1424       if (vectype && !host_integerp (this_offset, 1))
1425         return NULL_TREE;
1426       next_ref = oprnd0;
1427       break;
1428
1429     case ADDR_EXPR:
1430       oprnd0 = TREE_OPERAND (expr, 0);
1431       next_ref = oprnd0;
1432       break;
1433
1434     case INDIRECT_REF:
1435       oprnd0 = TREE_OPERAND (expr, 0);
1436       next_ref = oprnd0;
1437       break;
1438
1439     case ARRAY_REF:
1440       if (DR_REF (dr) != expr)
1441         /* Build array data_reference struct if the existing DR_REF
1442            doesn't match EXPR. This happens, for example, when the
1443            EXPR is *T and T is initialized to &arr[indx]. The DR struct
1444            contains information on the access of T, not of arr. In order
1445            to continue  the analysis, we create a new DR struct that
1446            describes the access of arr.
1447         */
1448         array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1449       else
1450         array_dr = dr;
1451
1452       next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1453                                                    vectype, &this_offset);
1454       if (!next_ref)
1455         return NULL_TREE;
1456
1457       if (vectype &&
1458           TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1459         {
1460           *offset = this_offset;
1461           *base_aligned_p = true;
1462           return next_ref;
1463         }
1464       break;
1465
1466     case PLUS_EXPR:
1467     case MINUS_EXPR:
1468       /* In case we have a PLUS_EXPR of the form
1469          (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1470          This is verified in  vect_get_symbl_and_dr.  */
1471       oprnd0 = TREE_OPERAND (expr, 0);
1472       oprnd1 = TREE_OPERAND (expr, 1);
1473
1474       base = vect_get_base_and_bit_offset
1475         (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1476       if (vectype && !base)
1477         return NULL_TREE;
1478
1479       next_ref = oprnd0;
1480       break;
1481
1482     default:
1483       return NULL_TREE;
1484     }
1485
1486   base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1487                                        loop_vinfo, offset, base_aligned_p);
1488
1489   if (vectype && base)
1490     {
1491       *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1492       if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1493         return NULL_TREE;
1494
1495       if (vect_debug_details (NULL))
1496         {
1497           print_generic_expr (dump_file, expr, TDF_SLIM);
1498           fprintf (dump_file, " --> total offset for ref: ");
1499           print_generic_expr (dump_file, *offset, TDF_SLIM);
1500         }
1501     }
1502   return base;
1503 }
1504
1505
1506 /* Function vect_force_dr_alignment_p.
1507
1508    Returns whether the alignment of a DECL can be forced to be aligned
1509    on ALIGNMENT bit boundary.  */
1510
1511 static bool
1512 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1513 {
1514   if (TREE_CODE (decl) != VAR_DECL)
1515     return false;
1516
1517   if (DECL_EXTERNAL (decl))
1518     return false;
1519
1520   if (TREE_ASM_WRITTEN (decl))
1521     return false;
1522
1523   if (TREE_STATIC (decl))
1524     return (alignment <= MAX_OFILE_ALIGNMENT);
1525   else
1526     /* This is not 100% correct.  The absolute correct stack alignment
1527        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1528        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1529        However, until someone implements forced stack alignment, SSE
1530        isn't really usable without this.  */
1531     return (alignment <= PREFERRED_STACK_BOUNDARY);
1532 }
1533
1534
1535 /* Function vect_get_new_vect_var.
1536
1537    Returns a name for a new variable. The current naming scheme appends the
1538    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1539    the name of vectorizer generated variables, and appends that to NAME if
1540    provided.  */
1541
1542 static tree
1543 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1544 {
1545   const char *prefix;
1546   int prefix_len;
1547   tree new_vect_var;
1548
1549   if (var_kind == vect_simple_var)
1550     prefix = "vect_";
1551   else
1552     prefix = "vect_p";
1553
1554   prefix_len = strlen (prefix);
1555
1556   if (name)
1557     new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1558   else
1559     new_vect_var = create_tmp_var (type, prefix);
1560
1561   return new_vect_var;
1562 }
1563
1564
1565 /* Function vect_create_index_for_vector_ref.
1566
1567    Create (and return) an index variable, along with it's update chain in the
1568    loop. This variable will be used to access a memory location in a vector
1569    operation.
1570
1571    Input:
1572    LOOP: The loop being vectorized.
1573    BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1574         function can be added here, or in the loop pre-header.
1575
1576    Output:
1577    Return an index that will be used to index a vector array.  It is expected
1578    that a pointer to the first vector will be used as the base address for the
1579    indexed reference.
1580
1581    FORNOW: we are not trying to be efficient, just creating a new index each
1582    time from scratch.  At this time all vector references could use the same
1583    index.
1584
1585    TODO: create only one index to be used by all vector references.  Record
1586    the index in the LOOP_VINFO the first time this procedure is called and
1587    return it on subsequent calls.  The increment of this index must be placed
1588    just before the conditional expression that ends the single block loop.  */
1589
1590 static tree
1591 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1592 {
1593   tree init, step;
1594   tree indx_before_incr, indx_after_incr;
1595
1596   /* It is assumed that the base pointer used for vectorized access contains
1597      the address of the first vector.  Therefore the index used for vectorized
1598      access must be initialized to zero and incremented by 1.  */
1599
1600   init = integer_zero_node;
1601   step = integer_one_node;
1602
1603   /* Assuming that bsi_insert is used with BSI_NEW_STMT  */
1604   create_iv (init, step, NULL_TREE, loop, bsi, false,
1605         &indx_before_incr, &indx_after_incr);
1606
1607   return indx_before_incr;
1608 }
1609
1610
1611 /* Function vect_create_addr_base_for_vector_ref.
1612
1613    Create an expression that computes the address of the first memory location
1614    that will be accessed for a data reference.
1615
1616    Input:
1617    STMT: The statement containing the data reference.
1618    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1619    OFFSET: Optional. If supplied, it is be added to the initial address.
1620
1621    Output:
1622    1. Return an SSA_NAME whose value is the address of the memory location of
1623       the first vector of the data reference.
1624    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1625       these statement(s) which define the returned SSA_NAME.
1626
1627    FORNOW: We are only handling array accesses with step 1.  */
1628
1629 static tree
1630 vect_create_addr_base_for_vector_ref (tree stmt,
1631                                       tree *new_stmt_list,
1632                                       tree offset)
1633 {
1634   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1635   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1636   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1637   tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1638   tree base_name = unshare_expr (DR_BASE_NAME (dr));
1639   tree ref = DR_REF (dr);
1640   tree data_ref_base_type = TREE_TYPE (data_ref_base);
1641   tree scalar_type = TREE_TYPE (ref);
1642   tree scalar_ptr_type = build_pointer_type (scalar_type);
1643   tree access_fn;
1644   tree init_val, step, init_oval;
1645   bool ok;
1646   bool is_ptr_ref, is_array_ref, is_addr_expr;
1647   tree array_base;
1648   tree vec_stmt;
1649   tree new_temp;
1650   tree array_ref;
1651   tree addr_base, addr_expr;
1652   tree dest, new_stmt;
1653
1654   /* Only the access function of the last index is relevant (i_n in
1655      a[i_1][i_2]...[i_n]), the others correspond to loop invariants.  */
1656   access_fn = DR_ACCESS_FN (dr, 0);
1657   ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1658                                     true);
1659   if (!ok)
1660     init_oval = integer_zero_node;
1661
1662   is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1663                && TREE_CODE (data_ref_base) == SSA_NAME;
1664   is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1665   is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1666                  || TREE_CODE (data_ref_base) == PLUS_EXPR
1667                  || TREE_CODE (data_ref_base) == MINUS_EXPR;
1668   gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1669
1670   /** Create: &(base[init_val])
1671
1672       if data_ref_base is an ARRAY_TYPE:
1673          base = data_ref_base
1674
1675       if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1676          base = *((scalar_array *) data_ref_base)
1677    **/
1678
1679   if (is_array_ref)
1680     array_base = data_ref_base;
1681   else /* is_ptr_ref  or is_addr_expr */
1682     {
1683       /* array_ptr = (scalar_array_ptr_type *) data_ref_base;  */
1684       tree scalar_array_type = build_array_type (scalar_type, 0);
1685       tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1686       tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1687       add_referenced_tmp_var (array_ptr);
1688
1689       dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1690       add_referenced_tmp_var (dest);
1691       data_ref_base =
1692         force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1693       append_to_statement_list_force (new_stmt, new_stmt_list);
1694
1695       vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1696       vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1697       new_temp = make_ssa_name (array_ptr, vec_stmt);
1698       TREE_OPERAND (vec_stmt, 0) = new_temp;
1699       append_to_statement_list_force (vec_stmt, new_stmt_list);
1700
1701       /* (*array_ptr)  */
1702       array_base = build_fold_indirect_ref (new_temp);
1703     }
1704
1705   dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1706   add_referenced_tmp_var (dest);
1707   init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1708   append_to_statement_list_force (new_stmt, new_stmt_list);
1709
1710   if (offset)
1711     {
1712       tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1713       add_referenced_tmp_var (tmp);
1714       vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1715       vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1716       init_val = make_ssa_name (tmp, vec_stmt);
1717       TREE_OPERAND (vec_stmt, 0) = init_val;
1718       append_to_statement_list_force (vec_stmt, new_stmt_list);
1719     }
1720
1721   array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1722                       NULL_TREE, NULL_TREE);
1723   addr_base = build_fold_addr_expr (array_ref);
1724
1725   /* addr_expr = addr_base */
1726   addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1727                                      get_name (base_name));
1728   add_referenced_tmp_var (addr_expr);
1729   vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1730   new_temp = make_ssa_name (addr_expr, vec_stmt);
1731   TREE_OPERAND (vec_stmt, 0) = new_temp;
1732   append_to_statement_list_force (vec_stmt, new_stmt_list);
1733
1734   return new_temp;
1735 }
1736
1737
1738 /* Function get_vectype_for_scalar_type.
1739
1740    Returns the vector type corresponding to SCALAR_TYPE as supported
1741    by the target.  */
1742
1743 static tree
1744 get_vectype_for_scalar_type (tree scalar_type)
1745 {
1746   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1747   int nbytes = GET_MODE_SIZE (inner_mode);
1748   int nunits;
1749   tree vectype;
1750
1751   if (nbytes == 0)
1752     return NULL_TREE;
1753
1754   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1755      is expected.  */
1756   nunits = UNITS_PER_SIMD_WORD / nbytes;
1757
1758   vectype = build_vector_type (scalar_type, nunits);
1759   if (vect_debug_details (NULL))
1760     {
1761       fprintf (dump_file, "get vectype with %d units of type ", nunits);
1762       print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1763     }
1764
1765   if (!vectype)
1766     return NULL_TREE;
1767
1768   if (vect_debug_details (NULL))
1769     {
1770       fprintf (dump_file, "vectype: ");
1771       print_generic_expr (dump_file, vectype, TDF_SLIM);
1772     }
1773
1774   if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1775     {
1776       /* TODO: tree-complex.c sometimes can parallelize operations
1777          on generic vectors.  We can vectorize the loop in that case,
1778          but then we should re-run the lowering pass.  */
1779       if (vect_debug_details (NULL))
1780         fprintf (dump_file, "mode not supported by target.");
1781       return NULL_TREE;
1782     }
1783
1784   return vectype;
1785 }
1786
1787
1788 /* Function vect_align_data_ref.
1789
1790    Handle mislignment of a memory accesses.
1791
1792    FORNOW: Can't handle misaligned accesses.
1793    Make sure that the dataref is aligned.  */
1794
1795 static void
1796 vect_align_data_ref (tree stmt)
1797 {
1798   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1799   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1800
1801   /* FORNOW: can't handle misaligned accesses;
1802              all accesses expected to be aligned.  */
1803   gcc_assert (aligned_access_p (dr));
1804 }
1805
1806
1807 /* Function vect_create_data_ref_ptr.
1808
1809    Create a memory reference expression for vector access, to be used in a
1810    vector load/store stmt. The reference is based on a new pointer to vector
1811    type (vp).
1812
1813    Input:
1814    1. STMT: a stmt that references memory. Expected to be of the form
1815          MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1816    2. BSI: block_stmt_iterator where new stmts can be added.
1817    3. OFFSET (optional): an offset to be added to the initial address accessed
1818         by the data-ref in STMT.
1819    4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1820         pointing to the initial address.
1821
1822    Output:
1823    1. Declare a new ptr to vector_type, and have it point to the base of the
1824       data reference (initial addressed accessed by the data reference).
1825       For example, for vector of type V8HI, the following code is generated:
1826
1827       v8hi *vp;
1828       vp = (v8hi *)initial_address;
1829
1830       if OFFSET is not supplied:
1831          initial_address = &a[init];
1832       if OFFSET is supplied:
1833          initial_address = &a[init + OFFSET];
1834
1835       Return the initial_address in INITIAL_ADDRESS.
1836
1837    2. Create a data-reference in the loop based on the new vector pointer vp,
1838       and using a new index variable 'idx' as follows:
1839
1840       vp' = vp + update
1841
1842       where if ONLY_INIT is true:
1843          update = zero
1844       and otherwise
1845          update = idx + vector_type_size
1846
1847       Return the pointer vp'.
1848
1849
1850    FORNOW: handle only aligned and consecutive accesses.  */
1851
1852 static tree
1853 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1854                           tree *initial_address, bool only_init)
1855 {
1856   tree base_name;
1857   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1858   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1859   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1860   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1861   tree vect_ptr_type;
1862   tree vect_ptr;
1863   tree tag;
1864   v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1865   v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1866   vuse_optype vuses = STMT_VUSE_OPS (stmt);
1867   int nvuses, nv_may_defs, nv_must_defs;
1868   int i;
1869   tree new_temp;
1870   tree vec_stmt;
1871   tree new_stmt_list = NULL_TREE;
1872   tree idx;
1873   edge pe = loop_preheader_edge (loop);
1874   basic_block new_bb;
1875   tree vect_ptr_init;
1876   tree vectype_size;
1877   tree ptr_update;
1878   tree data_ref_ptr;
1879   tree type, tmp, size;
1880
1881   base_name = unshare_expr (DR_BASE_NAME (dr));
1882   if (vect_debug_details (NULL))
1883     {
1884       tree data_ref_base = base_name;
1885       fprintf (dump_file, "create array_ref of type: ");
1886       print_generic_expr (dump_file, vectype, TDF_SLIM);
1887       if (TREE_CODE (data_ref_base) == VAR_DECL)
1888         fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1889       else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1890         fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1891       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1892         fprintf (dump_file, "vectorizing a record based array ref: ");
1893       else if (TREE_CODE (data_ref_base) == SSA_NAME)
1894         fprintf (dump_file, "vectorizing a pointer ref: ");
1895       print_generic_expr (dump_file, base_name, TDF_SLIM);
1896     }
1897
1898   /** (1) Create the new vector-pointer variable:  **/
1899
1900   vect_ptr_type = build_pointer_type (vectype);
1901   vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1902                                     get_name (base_name));
1903   add_referenced_tmp_var (vect_ptr);
1904
1905
1906   /** (2) Handle aliasing information of the new vector-pointer:  **/
1907
1908   tag = STMT_VINFO_MEMTAG (stmt_info);
1909   gcc_assert (tag);
1910   get_var_ann (vect_ptr)->type_mem_tag = tag;
1911
1912   /* Mark for renaming all aliased variables
1913      (i.e, the may-aliases of the type-mem-tag).  */
1914   nvuses = NUM_VUSES (vuses);
1915   nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1916   nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1917   for (i = 0; i < nvuses; i++)
1918     {
1919       tree use = VUSE_OP (vuses, i);
1920       if (TREE_CODE (use) == SSA_NAME)
1921         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1922     }
1923   for (i = 0; i < nv_may_defs; i++)
1924     {
1925       tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1926       if (TREE_CODE (def) == SSA_NAME)
1927         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1928     }
1929   for (i = 0; i < nv_must_defs; i++)
1930     {
1931       tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1932       if (TREE_CODE (def) == SSA_NAME)
1933         bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1934     }
1935
1936
1937   /** (3) Calculate the initial address the vector-pointer, and set
1938           the vector-pointer to point to it before the loop:  **/
1939
1940   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
1941   new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1942                                                    offset);
1943   pe = loop_preheader_edge (loop);
1944   new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1945   gcc_assert (!new_bb);
1946   *initial_address = new_temp;
1947
1948   /* Create: p = (vectype *) initial_base  */
1949   vec_stmt = fold_convert (vect_ptr_type, new_temp);
1950   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1951   new_temp = make_ssa_name (vect_ptr, vec_stmt);
1952   TREE_OPERAND (vec_stmt, 0) = new_temp;
1953   new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1954   gcc_assert (!new_bb);
1955   vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1956
1957
1958   /** (4) Handle the updating of the vector-pointer inside the loop: **/
1959
1960   if (only_init) /* No update in loop is required.  */
1961     return vect_ptr_init;
1962
1963   idx = vect_create_index_for_vector_ref (loop, bsi);
1964
1965   /* Create: update = idx * vectype_size  */
1966   tmp = create_tmp_var (integer_type_node, "update");
1967   add_referenced_tmp_var (tmp);
1968   size = TYPE_SIZE (vect_ptr_type);
1969   type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1970   ptr_update = create_tmp_var (type, "update");
1971   add_referenced_tmp_var (ptr_update);
1972   vectype_size = build_int_cst (integer_type_node,
1973                                 GET_MODE_SIZE (TYPE_MODE (vectype)));
1974   vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1975   vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
1976   new_temp = make_ssa_name (tmp, vec_stmt);
1977   TREE_OPERAND (vec_stmt, 0) = new_temp;
1978   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1979   vec_stmt = fold_convert (type, new_temp);
1980   vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1981   new_temp = make_ssa_name (ptr_update, vec_stmt);
1982   TREE_OPERAND (vec_stmt, 0) = new_temp;
1983   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1984
1985   /* Create: data_ref_ptr = vect_ptr_init + update  */
1986   vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1987   vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1988   new_temp = make_ssa_name (vect_ptr, vec_stmt);
1989   TREE_OPERAND (vec_stmt, 0) = new_temp;
1990   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1991   data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1992
1993   return data_ref_ptr;
1994 }
1995
1996
1997 /* Function vect_create_destination_var.
1998
1999    Create a new temporary of type VECTYPE.  */
2000
2001 static tree
2002 vect_create_destination_var (tree scalar_dest, tree vectype)
2003 {
2004   tree vec_dest;
2005   const char *new_name;
2006
2007   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2008
2009   new_name = get_name (scalar_dest);
2010   if (!new_name)
2011     new_name = "var_";
2012   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2013   add_referenced_tmp_var (vec_dest);
2014
2015   return vec_dest;
2016 }
2017
2018
2019 /* Function vect_init_vector.
2020
2021    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2022    the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2023    used in the vectorization of STMT.  */
2024
2025 static tree
2026 vect_init_vector (tree stmt, tree vector_var)
2027 {
2028   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2029   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2030   tree new_var;
2031   tree init_stmt;
2032   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2033   tree vec_oprnd;
2034   edge pe;
2035   tree new_temp;
2036   basic_block new_bb;
2037
2038   new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2039   add_referenced_tmp_var (new_var);
2040
2041   init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2042   new_temp = make_ssa_name (new_var, init_stmt);
2043   TREE_OPERAND (init_stmt, 0) = new_temp;
2044
2045   pe = loop_preheader_edge (loop);
2046   new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2047   gcc_assert (!new_bb);
2048
2049   if (vect_debug_details (NULL))
2050     {
2051       fprintf (dump_file, "created new init_stmt: ");
2052       print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2053     }
2054
2055   vec_oprnd = TREE_OPERAND (init_stmt, 0);
2056   return vec_oprnd;
2057 }
2058
2059
2060 /* Function vect_get_vec_def_for_operand.
2061
2062    OP is an operand in STMT. This function returns a (vector) def that will be
2063    used in the vectorized stmt for STMT.
2064
2065    In the case that OP is an SSA_NAME which is defined in the loop, then
2066    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2067
2068    In case OP is an invariant or constant, a new stmt that creates a vector def
2069    needs to be introduced.  */
2070
2071 static tree
2072 vect_get_vec_def_for_operand (tree op, tree stmt)
2073 {
2074   tree vec_oprnd;
2075   tree vec_stmt;
2076   tree def_stmt;
2077   stmt_vec_info def_stmt_info = NULL;
2078   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2079   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2080   int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2081   struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2082   basic_block bb;
2083   tree vec_inv;
2084   tree t = NULL_TREE;
2085   tree def;
2086   int i;
2087
2088   if (vect_debug_details (NULL))
2089     {
2090       fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2091       print_generic_expr (dump_file, op, TDF_SLIM);
2092     }
2093
2094   /** ===> Case 1: operand is a constant.  **/
2095
2096   if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2097     {
2098       /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
2099
2100       tree vec_cst;
2101
2102       /* Build a tree with vector elements.  */
2103       if (vect_debug_details (NULL))
2104         fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2105
2106       for (i = nunits - 1; i >= 0; --i)
2107         {
2108           t = tree_cons (NULL_TREE, op, t);
2109         }
2110       vec_cst = build_vector (vectype, t);
2111       return vect_init_vector (stmt, vec_cst);
2112     }
2113
2114   gcc_assert (TREE_CODE (op) == SSA_NAME);
2115
2116   /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it.  **/
2117
2118   def_stmt = SSA_NAME_DEF_STMT (op);
2119   def_stmt_info = vinfo_for_stmt (def_stmt);
2120
2121   if (vect_debug_details (NULL))
2122     {
2123       fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2124       print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2125     }
2126
2127
2128   /** ==> Case 2.1: operand is defined inside the loop.  **/
2129
2130   if (def_stmt_info)
2131     {
2132       /* Get the def from the vectorized stmt.  */
2133
2134       vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2135       gcc_assert (vec_stmt);
2136       vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2137       return vec_oprnd;
2138     }
2139
2140
2141   /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2142                     it is a reduction/induction.  **/
2143
2144   bb = bb_for_stmt (def_stmt);
2145   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2146     {
2147       if (vect_debug_details (NULL))
2148         fprintf (dump_file, "reduction/induction - unsupported.");
2149       internal_error ("no support for reduction/induction"); /* FORNOW */
2150     }
2151
2152
2153   /** ==> Case 2.3: operand is defined outside the loop -
2154                     it is a loop invariant.  */
2155
2156   switch (TREE_CODE (def_stmt))
2157     {
2158     case PHI_NODE:
2159       def = PHI_RESULT (def_stmt);
2160       break;
2161     case MODIFY_EXPR:
2162       def = TREE_OPERAND (def_stmt, 0);
2163       break;
2164     case NOP_EXPR:
2165       def = TREE_OPERAND (def_stmt, 0);
2166       gcc_assert (IS_EMPTY_STMT (def_stmt));
2167       def = op;
2168       break;
2169     default:
2170       if (vect_debug_details (NULL))
2171         {
2172           fprintf (dump_file, "unsupported defining stmt: ");
2173           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2174         }
2175       internal_error ("unsupported defining stmt");
2176     }
2177
2178   /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}'  */
2179
2180   if (vect_debug_details (NULL))
2181     fprintf (dump_file, "Create vector_inv.");
2182
2183   for (i = nunits - 1; i >= 0; --i)
2184     {
2185       t = tree_cons (NULL_TREE, def, t);
2186     }
2187
2188   vec_inv = build_constructor (vectype, t);
2189   return vect_init_vector (stmt, vec_inv);
2190 }
2191
2192
2193 /* Function vect_finish_stmt_generation.
2194
2195    Insert a new stmt.  */
2196
2197 static void
2198 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2199 {
2200   bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2201
2202   if (vect_debug_details (NULL))
2203     {
2204       fprintf (dump_file, "add new stmt: ");
2205       print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2206     }
2207
2208   /* Make sure bsi points to the stmt that is being vectorized.  */
2209
2210   /* Assumption: any stmts created for the vectorization of stmt S were
2211      inserted before S. BSI is expected to point to S or some new stmt before S.
2212    */
2213
2214   while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2215     bsi_next (bsi);
2216   gcc_assert (stmt == bsi_stmt (*bsi));
2217 }
2218
2219
2220 /* Function vectorizable_assignment.
2221
2222    Check if STMT performs an assignment (copy) that can be vectorized.
2223    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2224    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2225    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2226
2227 static bool
2228 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2229 {
2230   tree vec_dest;
2231   tree scalar_dest;
2232   tree op;
2233   tree vec_oprnd;
2234   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2235   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2236   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2237   tree new_temp;
2238
2239   /* Is vectorizable assignment?  */
2240
2241   if (TREE_CODE (stmt) != MODIFY_EXPR)
2242     return false;
2243
2244   scalar_dest = TREE_OPERAND (stmt, 0);
2245   if (TREE_CODE (scalar_dest) != SSA_NAME)
2246     return false;
2247
2248   op = TREE_OPERAND (stmt, 1);
2249   if (!vect_is_simple_use (op, loop, NULL))
2250     {
2251       if (vect_debug_details (NULL))
2252         fprintf (dump_file, "use not simple.");
2253       return false;
2254     }
2255
2256   if (!vec_stmt) /* transformation not required.  */
2257     {
2258       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2259       return true;
2260     }
2261
2262   /** Trasform.  **/
2263   if (vect_debug_details (NULL))
2264     fprintf (dump_file, "transform assignment.");
2265
2266   /* Handle def.  */
2267   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2268
2269   /* Handle use.  */
2270   op = TREE_OPERAND (stmt, 1);
2271   vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2272
2273   /* Arguments are ready. create the new vector stmt.  */
2274   *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2275   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2276   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2277   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2278
2279   return true;
2280 }
2281
2282
2283 /* Function vectorizable_operation.
2284
2285    Check if STMT performs a binary or unary operation that can be vectorized.
2286    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2287    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2288    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2289
2290 static bool
2291 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2292 {
2293   tree vec_dest;
2294   tree scalar_dest;
2295   tree operation;
2296   tree op0, op1 = NULL;
2297   tree vec_oprnd0, vec_oprnd1=NULL;
2298   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2299   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2300   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2301   int i;
2302   enum tree_code code;
2303   enum machine_mode vec_mode;
2304   tree new_temp;
2305   int op_type;
2306   tree op;
2307   optab optab;
2308
2309   /* Is STMT a vectorizable binary/unary operation?   */
2310   if (TREE_CODE (stmt) != MODIFY_EXPR)
2311     return false;
2312
2313   if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2314     return false;
2315
2316   operation = TREE_OPERAND (stmt, 1);
2317   code = TREE_CODE (operation);
2318   optab = optab_for_tree_code (code, vectype);
2319
2320   /* Support only unary or binary operations.  */
2321   op_type = TREE_CODE_LENGTH (code);
2322   if (op_type != unary_op && op_type != binary_op)
2323     {
2324       if (vect_debug_details (NULL))
2325         fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2326       return false;
2327     }
2328
2329   for (i = 0; i < op_type; i++)
2330     {
2331       op = TREE_OPERAND (operation, i);
2332       if (!vect_is_simple_use (op, loop, NULL))
2333         {
2334           if (vect_debug_details (NULL))
2335             fprintf (dump_file, "use not simple.");
2336           return false;
2337         }
2338     }
2339
2340   /* Supportable by target?  */
2341   if (!optab)
2342     {
2343       if (vect_debug_details (NULL))
2344         fprintf (dump_file, "no optab.");
2345       return false;
2346     }
2347   vec_mode = TYPE_MODE (vectype);
2348   if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2349     {
2350       if (vect_debug_details (NULL))
2351         fprintf (dump_file, "op not supported by target.");
2352       return false;
2353     }
2354
2355   if (!vec_stmt) /* transformation not required.  */
2356     {
2357       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2358       return true;
2359     }
2360
2361   /** Transform.  **/
2362
2363   if (vect_debug_details (NULL))
2364     fprintf (dump_file, "transform binary/unary operation.");
2365
2366   /* Handle def.  */
2367   scalar_dest = TREE_OPERAND (stmt, 0);
2368   vec_dest = vect_create_destination_var (scalar_dest, vectype);
2369
2370   /* Handle uses.  */
2371   op0 = TREE_OPERAND (operation, 0);
2372   vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2373
2374   if (op_type == binary_op)
2375     {
2376       op1 = TREE_OPERAND (operation, 1);
2377       vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2378     }
2379
2380   /* Arguments are ready. create the new vector stmt.  */
2381
2382   if (op_type == binary_op)
2383     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2384                 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2385   else
2386     *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2387                 build1 (code, vectype, vec_oprnd0));
2388   new_temp = make_ssa_name (vec_dest, *vec_stmt);
2389   TREE_OPERAND (*vec_stmt, 0) = new_temp;
2390   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2391
2392   return true;
2393 }
2394
2395
2396 /* Function vectorizable_store.
2397
2398    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2399    can be vectorized.
2400    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2401    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2402    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2403
2404 static bool
2405 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2406 {
2407   tree scalar_dest;
2408   tree data_ref;
2409   tree op;
2410   tree vec_oprnd1;
2411   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2412   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2413   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2414   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2415   enum machine_mode vec_mode;
2416   tree dummy;
2417   enum dr_alignment_support alignment_support_cheme;
2418
2419   /* Is vectorizable store? */
2420
2421   if (TREE_CODE (stmt) != MODIFY_EXPR)
2422     return false;
2423
2424   scalar_dest = TREE_OPERAND (stmt, 0);
2425   if (TREE_CODE (scalar_dest) != ARRAY_REF
2426       && TREE_CODE (scalar_dest) != INDIRECT_REF)
2427     return false;
2428
2429   op = TREE_OPERAND (stmt, 1);
2430   if (!vect_is_simple_use (op, loop, NULL))
2431     {
2432       if (vect_debug_details (NULL))
2433         fprintf (dump_file, "use not simple.");
2434       return false;
2435     }
2436
2437   vec_mode = TYPE_MODE (vectype);
2438   /* FORNOW. In some cases can vectorize even if data-type not supported
2439      (e.g. - array initialization with 0).  */
2440   if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2441     return false;
2442
2443   if (!STMT_VINFO_DATA_REF (stmt_info))
2444     return false;
2445
2446
2447   if (!vec_stmt) /* transformation not required.  */
2448     {
2449       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2450       return true;
2451     }
2452
2453   /** Trasform.  **/
2454
2455   if (vect_debug_details (NULL))
2456     fprintf (dump_file, "transform store");
2457
2458   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2459   gcc_assert (alignment_support_cheme);
2460   gcc_assert (alignment_support_cheme = dr_aligned);  /* FORNOW */
2461
2462   /* Handle use - get the vectorized def from the defining stmt.  */
2463   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2464
2465   /* Handle def.  */
2466   /* FORNOW: make sure the data reference is aligned.  */
2467   vect_align_data_ref (stmt);
2468   data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2469   data_ref = build_fold_indirect_ref (data_ref);
2470
2471   /* Arguments are ready. create the new vector stmt.  */
2472   *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2473   vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2474
2475   return true;
2476 }
2477
2478
2479 /* vectorizable_load.
2480
2481    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2482    can be vectorized.
2483    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2484    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2485    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2486
2487 static bool
2488 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2489 {
2490   tree scalar_dest;
2491   tree vec_dest = NULL;
2492   tree data_ref = NULL;
2493   tree op;
2494   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2495   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2496   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2497   tree new_temp;
2498   int mode;
2499   tree init_addr;
2500   tree new_stmt;
2501   tree dummy;
2502   basic_block new_bb;
2503   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2504   edge pe = loop_preheader_edge (loop);
2505   enum dr_alignment_support alignment_support_cheme;
2506
2507   /* Is vectorizable load? */
2508
2509   if (TREE_CODE (stmt) != MODIFY_EXPR)
2510     return false;
2511
2512   scalar_dest = TREE_OPERAND (stmt, 0);
2513   if (TREE_CODE (scalar_dest) != SSA_NAME)
2514     return false;
2515
2516   op = TREE_OPERAND (stmt, 1);
2517   if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2518     return false;
2519
2520   if (!STMT_VINFO_DATA_REF (stmt_info))
2521     return false;
2522
2523   mode = (int) TYPE_MODE (vectype);
2524
2525   /* FORNOW. In some cases can vectorize even if data-type not supported
2526     (e.g. - data copies).  */
2527   if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2528     {
2529       if (vect_debug_details (loop))
2530         fprintf (dump_file, "Aligned load, but unsupported type.");
2531       return false;
2532     }
2533
2534   if (!vec_stmt) /* transformation not required.  */
2535     {
2536       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2537       return true;
2538     }
2539
2540   /** Trasform.  **/
2541
2542   if (vect_debug_details (NULL))
2543     fprintf (dump_file, "transform load.");
2544
2545   alignment_support_cheme = vect_supportable_dr_alignment (dr);
2546   gcc_assert (alignment_support_cheme);
2547
2548   if (alignment_support_cheme == dr_aligned
2549       || alignment_support_cheme == dr_unaligned_supported)
2550     {
2551       /* Create:
2552          p = initial_addr;
2553          indx = 0;
2554          loop {
2555            vec_dest = *(p);
2556            indx = indx + 1;
2557          }
2558       */
2559
2560       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2561       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2562       if (aligned_access_p (dr))
2563         data_ref = build_fold_indirect_ref (data_ref);
2564       else
2565         {
2566           int mis = DR_MISALIGNMENT (dr);
2567           tree tmis = (mis == -1 ?
2568                        integer_zero_node :
2569                        build_int_cst (integer_type_node, mis));
2570           tmis = int_const_binop (MULT_EXPR, tmis,
2571                         build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2572           data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2573         }
2574       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2575       new_temp = make_ssa_name (vec_dest, new_stmt);
2576       TREE_OPERAND (new_stmt, 0) = new_temp;
2577       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2578     }
2579   else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2580     {
2581       /* Create:
2582          p1 = initial_addr;
2583          msq_init = *(floor(p1))
2584          p2 = initial_addr + VS - 1;
2585          magic = have_builtin ? builtin_result : initial_address;
2586          indx = 0;
2587          loop {
2588            p2' = p2 + indx * vectype_size
2589            lsq = *(floor(p2'))
2590            vec_dest = realign_load (msq, lsq, magic)
2591            indx = indx + 1;
2592            msq = lsq;
2593          }
2594       */
2595
2596       tree offset;
2597       tree magic;
2598       tree phi_stmt;
2599       tree msq_init;
2600       tree msq, lsq;
2601       tree dataref_ptr;
2602       tree params;
2603
2604       /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
2605       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2606       data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2607                                            &init_addr, true);
2608       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2609       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2610       new_temp = make_ssa_name (vec_dest, new_stmt);
2611       TREE_OPERAND (new_stmt, 0) = new_temp;
2612       new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2613       gcc_assert (!new_bb);
2614       msq_init = TREE_OPERAND (new_stmt, 0);
2615
2616
2617       /* <2> Create lsq = *(floor(p2')) in the loop  */
2618       offset = build_int_cst (integer_type_node,
2619                               GET_MODE_NUNITS (TYPE_MODE (vectype)));
2620       offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2621       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2622       dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2623       data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2624       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2625       new_temp = make_ssa_name (vec_dest, new_stmt);
2626       TREE_OPERAND (new_stmt, 0) = new_temp;
2627       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2628       lsq = TREE_OPERAND (new_stmt, 0);
2629
2630
2631       /* <3> */
2632       if (targetm.vectorize.builtin_mask_for_load)
2633         {
2634           /* Create permutation mask, if required, in loop preheader.  */
2635           tree builtin_decl;
2636           params = build_tree_list (NULL_TREE, init_addr);
2637           vec_dest = vect_create_destination_var (scalar_dest, vectype);
2638           builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2639           new_stmt = build_function_call_expr (builtin_decl, params);
2640           new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2641           new_temp = make_ssa_name (vec_dest, new_stmt);
2642           TREE_OPERAND (new_stmt, 0) = new_temp;
2643           new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2644           gcc_assert (!new_bb);
2645           magic = TREE_OPERAND (new_stmt, 0);
2646
2647           /* Since we have just created a CALL_EXPR, we may need to
2648              rename call-clobbered variables.  */
2649           mark_call_clobbered_vars_to_rename ();
2650         }
2651       else
2652         {
2653           /* Use current address instead of init_addr for reduced reg pressure.
2654            */
2655           magic = dataref_ptr;
2656         }
2657
2658
2659       /* <4> Create msq = phi <msq_init, lsq> in loop  */
2660       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2661       msq = make_ssa_name (vec_dest, NULL_TREE);
2662       phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2663       SSA_NAME_DEF_STMT (msq) = phi_stmt;
2664       add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2665       add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2666
2667
2668       /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
2669       vec_dest = vect_create_destination_var (scalar_dest, vectype);
2670       new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2671       new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2672       new_temp = make_ssa_name (vec_dest, new_stmt);
2673       TREE_OPERAND (new_stmt, 0) = new_temp;
2674       vect_finish_stmt_generation (stmt, new_stmt, bsi);
2675     }
2676   else
2677     gcc_unreachable ();
2678
2679   *vec_stmt = new_stmt;
2680   return true;
2681 }
2682
2683
2684 /* Function vect_supportable_dr_alignment
2685
2686    Return whether the data reference DR is supported with respect to its
2687    alignment.  */
2688
2689 static enum dr_alignment_support
2690 vect_supportable_dr_alignment (struct data_reference *dr)
2691 {
2692   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2693   enum machine_mode mode = (int) TYPE_MODE (vectype);
2694
2695   if (aligned_access_p (dr))
2696     return dr_aligned;
2697
2698   /* Possibly unaligned access.  */
2699
2700   if (DR_IS_READ (dr))
2701     {
2702       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2703           && (!targetm.vectorize.builtin_mask_for_load
2704               || targetm.vectorize.builtin_mask_for_load ()))
2705         return dr_unaligned_software_pipeline;
2706
2707       if (targetm.vectorize.misaligned_mem_ok (mode))
2708         /* Can't software pipeline the loads.  */
2709         return dr_unaligned_supported;
2710     }
2711
2712   /* Unsupported.  */
2713   return dr_unaligned_unsupported;
2714 }
2715
2716
2717 /* Function vect_transform_stmt.
2718
2719    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2720
2721 static bool
2722 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2723 {
2724   bool is_store = false;
2725   tree vec_stmt = NULL_TREE;
2726   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2727   bool done;
2728
2729   switch (STMT_VINFO_TYPE (stmt_info))
2730     {
2731     case op_vec_info_type:
2732       done = vectorizable_operation (stmt, bsi, &vec_stmt);
2733       gcc_assert (done);
2734       break;
2735
2736     case assignment_vec_info_type:
2737       done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2738       gcc_assert (done);
2739       break;
2740
2741     case load_vec_info_type:
2742       done = vectorizable_load (stmt, bsi, &vec_stmt);
2743       gcc_assert (done);
2744       break;
2745
2746     case store_vec_info_type:
2747       done = vectorizable_store (stmt, bsi, &vec_stmt);
2748       gcc_assert (done);
2749       is_store = true;
2750       break;
2751     default:
2752       if (vect_debug_details (NULL))
2753         fprintf (dump_file, "stmt not supported.");
2754       gcc_unreachable ();
2755     }
2756
2757   STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2758
2759   return is_store;
2760 }
2761
2762
2763 /* This function builds ni_name = number of iterations loop executes
2764    on the loop preheader.  */
2765
2766 static tree
2767 vect_build_loop_niters (loop_vec_info loop_vinfo)
2768 {
2769   tree ni_name, stmt, var;
2770   edge pe;
2771   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2772   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2773
2774   var = create_tmp_var (TREE_TYPE (ni), "niters");
2775   add_referenced_tmp_var (var);
2776   ni_name = force_gimple_operand (ni, &stmt, false, var);
2777
2778   pe = loop_preheader_edge (loop);
2779   if (stmt)
2780     {
2781       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2782       gcc_assert (!new_bb);
2783     }
2784
2785   return ni_name;
2786 }
2787
2788
2789 /* This function generates the following statements:
2790
2791  ni_name = number of iterations loop executes
2792  ratio = ni_name / vf
2793  ratio_mult_vf_name = ratio * vf
2794
2795  and places them at the loop preheader edge.  */
2796
2797 static void
2798 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2799                                  tree *ni_name_ptr,
2800                                  tree *ratio_mult_vf_name_ptr,
2801                                  tree *ratio_name_ptr)
2802 {
2803
2804   edge pe;
2805   basic_block new_bb;
2806   tree stmt, ni_name;
2807   tree var;
2808   tree ratio_name;
2809   tree ratio_mult_vf_name;
2810   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2811   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2812   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2813   tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2814
2815   pe = loop_preheader_edge (loop);
2816
2817   /* Generate temporary variable that contains
2818      number of iterations loop executes.  */
2819
2820   ni_name = vect_build_loop_niters (loop_vinfo);
2821
2822   /* Create: ratio = ni >> log2(vf) */
2823
2824   var = create_tmp_var (TREE_TYPE (ni), "bnd");
2825   add_referenced_tmp_var (var);
2826   ratio_name = make_ssa_name (var, NULL_TREE);
2827   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2828            build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2829   SSA_NAME_DEF_STMT (ratio_name) = stmt;
2830
2831   pe = loop_preheader_edge (loop);
2832   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2833   gcc_assert (!new_bb);
2834
2835   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
2836
2837   var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2838   add_referenced_tmp_var (var);
2839   ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2840   stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2841            build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2842   SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2843
2844   pe = loop_preheader_edge (loop);
2845   new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2846   gcc_assert (!new_bb);
2847
2848   *ni_name_ptr = ni_name;
2849   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2850   *ratio_name_ptr = ratio_name;
2851
2852   return;
2853 }
2854
2855
2856 /*   Function vect_update_ivs_after_vectorizer.
2857
2858      "Advance" the induction variables of LOOP to the value they should take
2859      after the execution of LOOP.  This is currently necessary because the
2860      vectorizer does not handle induction variables that are used after the
2861      loop.  Such a situation occurs when the last iterations of LOOP are
2862      peeled, because:
2863      1. We introduced new uses after LOOP for IVs that were not originally used
2864         after LOOP: the IVs of LOOP are now used by an epilog loop.
2865      2. LOOP is going to be vectorized; this means that it will iterate N/VF
2866         times, whereas the loop IVs should be bumped N times.
2867
2868      Input:
2869      - LOOP - a loop that is going to be vectorized. The last few iterations
2870               of LOOP were peeled.
2871      - NITERS - the number of iterations that LOOP executes (before it is
2872                 vectorized). i.e, the number of times the ivs should be bumped.
2873      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2874                   coming out from LOOP on which there are uses of the LOOP ivs
2875                   (this is the path from LOOP->exit to epilog_loop->preheader).
2876
2877                   The new definitions of the ivs are placed in LOOP->exit.
2878                   The phi args associated with the edge UPDATE_E in the bb
2879                   UPDATE_E->dest are updated accordingly.
2880
2881      Assumption 1: Like the rest of the vectorizer, this function assumes
2882      a single loop exit that has a single predecessor.
2883
2884      Assumption 2: The phi nodes in the LOOP header and in update_bb are
2885      organized in the same order.
2886
2887      Assumption 3: The access function of the ivs is simple enough (see
2888      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2889
2890      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2891      coming out of LOOP on which the ivs of LOOP are used (this is the path
2892      that leads to the epilog loop; other paths skip the epilog loop).  This
2893      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2894      needs to have its phis updated.
2895  */
2896
2897 static void
2898 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2899 {
2900   basic_block exit_bb = loop->exit_edges[0]->dest;
2901   tree phi, phi1;
2902   basic_block update_bb = update_e->dest;
2903
2904   /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2905
2906   /* Make sure there exists a single-predecessor exit bb:  */
2907   gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2908
2909   for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2910        phi && phi1;
2911        phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2912     {
2913       tree access_fn = NULL;
2914       tree evolution_part;
2915       tree init_expr;
2916       tree step_expr;
2917       tree var, stmt, ni, ni_name;
2918       block_stmt_iterator last_bsi;
2919
2920       /* Skip virtual phi's.  */
2921       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2922         {
2923           if (vect_debug_details (NULL))
2924             fprintf (dump_file, "virtual phi. skip.");
2925           continue;
2926         }
2927
2928       access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2929       gcc_assert (access_fn);
2930       evolution_part =
2931          unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2932       gcc_assert (evolution_part != NULL_TREE);
2933
2934       /* FORNOW: We do not support IVs whose evolution function is a polynomial
2935          of degree >= 2 or exponential.  */
2936       gcc_assert (!tree_is_chrec (evolution_part));
2937
2938       step_expr = evolution_part;
2939       init_expr = unshare_expr (initial_condition (access_fn));
2940
2941       ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2942                   build2 (MULT_EXPR, TREE_TYPE (niters),
2943                        niters, step_expr), init_expr);
2944
2945       var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2946       add_referenced_tmp_var (var);
2947
2948       ni_name = force_gimple_operand (ni, &stmt, false, var);
2949
2950       /* Insert stmt into exit_bb.  */
2951       last_bsi = bsi_last (exit_bb);
2952       if (stmt)
2953         bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2954
2955       /* Fix phi expressions in the successor bb.  */
2956       gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2957                   PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2958       SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2959     }
2960 }
2961
2962
2963 /* Function vect_do_peeling_for_loop_bound
2964
2965    Peel the last iterations of the loop represented by LOOP_VINFO.
2966    The peeled iterations form a new epilog loop.  Given that the loop now
2967    iterates NITERS times, the new epilog loop iterates
2968    NITERS % VECTORIZATION_FACTOR times.
2969
2970    The original loop will later be made to iterate
2971    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2972
2973 static void
2974 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2975                                 struct loops *loops)
2976 {
2977
2978   tree ni_name, ratio_mult_vf_name;
2979   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2980   struct loop *new_loop;
2981   edge update_e;
2982 #ifdef ENABLE_CHECKING
2983   int loop_num;
2984 #endif
2985
2986   if (vect_debug_details (NULL))
2987     fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2988
2989   /* Generate the following variables on the preheader of original loop:
2990
2991      ni_name = number of iteration the original loop executes
2992      ratio = ni_name / vf
2993      ratio_mult_vf_name = ratio * vf  */
2994   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2995                                    &ratio_mult_vf_name, ratio);
2996
2997   /* Update loop info.  */
2998   loop->pre_header = loop_preheader_edge (loop)->src;
2999   loop->pre_header_edges[0] = loop_preheader_edge (loop);
3000
3001 #ifdef ENABLE_CHECKING
3002   loop_num  = loop->num;
3003 #endif
3004   new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3005                                             ratio_mult_vf_name, ni_name, false);
3006 #ifdef ENABLE_CHECKING
3007   gcc_assert (new_loop);
3008   gcc_assert (loop_num == loop->num);
3009   slpeel_verify_cfg_after_peeling (loop, new_loop);
3010 #endif
3011
3012   /* A guard that controls whether the new_loop is to be executed or skipped
3013      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
3014      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
3015      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
3016      is on the path where the LOOP IVs are used and need to be updated.  */
3017
3018   if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3019     update_e = EDGE_PRED (new_loop->pre_header, 0);
3020   else
3021     update_e = EDGE_PRED (new_loop->pre_header, 1);
3022
3023   /* Update IVs of original loop as if they were advanced
3024      by ratio_mult_vf_name steps.  */
3025   vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3026
3027   /* After peeling we have to reset scalar evolution analyzer.  */
3028   scev_reset ();
3029
3030   return;
3031 }
3032
3033
3034 /* Function vect_gen_niters_for_prolog_loop
3035
3036    Set the number of iterations for the loop represented by LOOP_VINFO
3037    to the minimum between LOOP_NITERS (the original iteration count of the loop)
3038    and the misalignment of DR - the first data reference recorded in
3039    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
3040    this loop, the data reference DR will refer to an aligned location.
3041
3042    The following computation is generated:
3043
3044    compute address misalignment in bytes:
3045    addr_mis = addr & (vectype_size - 1)
3046
3047    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3048
3049    (elem_size = element type size; an element is the scalar element
3050         whose type is the inner type of the vectype)  */
3051
3052 static tree
3053 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3054 {
3055   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3056   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3057   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3058   tree var, stmt;
3059   tree iters, iters_name;
3060   edge pe;
3061   basic_block new_bb;
3062   tree dr_stmt = DR_STMT (dr);
3063   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3064   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3065   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3066   tree elem_misalign;
3067   tree byte_misalign;
3068   tree new_stmts = NULL_TREE;
3069   tree start_addr =
3070         vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3071   tree ptr_type = TREE_TYPE (start_addr);
3072   tree size = TYPE_SIZE (ptr_type);
3073   tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3074   tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3075   tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3076   tree niters_type = TREE_TYPE (loop_niters);
3077   tree elem_size_log =
3078         build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3079   tree vf_tree = build_int_cst (unsigned_type_node, vf);
3080
3081   pe = loop_preheader_edge (loop);
3082   new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3083   gcc_assert (!new_bb);
3084
3085   /* Create:  byte_misalign = addr & (vectype_size - 1)  */
3086   byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3087
3088   /* Create:  elem_misalign = byte_misalign / element_size  */
3089   elem_misalign =
3090         build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3091
3092   /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
3093   iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3094   iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3095   iters = fold_convert (niters_type, iters);
3096
3097   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
3098   /* If the loop bound is known at compile time we already verified that it is
3099      greater than vf; since the misalignment ('iters') is at most vf, there's
3100      no need to generate the MIN_EXPR in this case.  */
3101   if (!host_integerp (loop_niters, 0))
3102     iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3103
3104   var = create_tmp_var (niters_type, "prolog_loop_niters");
3105   add_referenced_tmp_var (var);
3106   iters_name = force_gimple_operand (iters, &stmt, false, var);
3107
3108   /* Insert stmt on loop preheader edge.  */
3109   pe = loop_preheader_edge (loop);
3110   if (stmt)
3111     {
3112       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3113       gcc_assert (!new_bb);
3114     }
3115
3116   return iters_name;
3117 }
3118
3119
3120 /* Function vect_update_inits_of_dr
3121
3122    NITERS iterations were peeled from LOOP.  DR represents a data reference
3123    in LOOP.  This function updates the information recorded in DR to
3124    account for the fact that the first NITERS iterations had already been
3125    executed.  Specifically, it updates the initial_condition of the
3126    access_function of DR.  */
3127
3128 static void
3129 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3130                          tree niters)
3131 {
3132   tree access_fn = DR_ACCESS_FN (dr, 0);
3133   tree init, init_new, step;
3134
3135   step = evolution_part_in_loop_num (access_fn, loop->num);
3136   init = initial_condition (access_fn);
3137
3138   init_new = build2 (PLUS_EXPR, TREE_TYPE (init),
3139                   build2 (MULT_EXPR, TREE_TYPE (niters),
3140                          niters, step), init);
3141   DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3142
3143   return;
3144 }
3145
3146
3147 /* Function vect_update_inits_of_drs
3148
3149    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3150    This function updates the information recorded for the data references in
3151    the loop to account for the fact that the first NITERS iterations had
3152    already been executed.  Specifically, it updates the initial_condition of the
3153    access_function of all the data_references in the loop.  */
3154
3155 static void
3156 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3157 {
3158   unsigned int i;
3159   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3160   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3161   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3162
3163   if (dump_file && (dump_flags & TDF_DETAILS))
3164     fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3165
3166   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3167     {
3168       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3169       vect_update_inits_of_dr (dr, loop, niters);
3170     }
3171
3172   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3173     {
3174       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3175       vect_update_inits_of_dr (dr, loop, niters);
3176     }
3177 }
3178
3179
3180 /* Function vect_do_peeling_for_alignment
3181
3182    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3183    'niters' is set to the misalignment of one of the data references in the
3184    loop, thereby forcing it to refer to an aligned location at the beginning
3185    of the execution of this loop.  The data reference for which we are
3186    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
3187
3188 static void
3189 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3190 {
3191   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3192   tree niters_of_prolog_loop, ni_name;
3193   tree n_iters;
3194   struct loop *new_loop;
3195
3196   if (vect_debug_details (NULL))
3197     fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3198
3199   ni_name = vect_build_loop_niters (loop_vinfo);
3200   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3201
3202   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
3203   new_loop =
3204         slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3205                                        niters_of_prolog_loop, ni_name, true);
3206 #ifdef ENABLE_CHECKING
3207   gcc_assert (new_loop);
3208   slpeel_verify_cfg_after_peeling (new_loop, loop);
3209 #endif
3210
3211   /* Update number of times loop executes.  */
3212   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3213   LOOP_VINFO_NITERS (loop_vinfo) =
3214     build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3215
3216   /* Update the init conditions of the access functions of all data refs.  */
3217   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3218
3219   /* After peeling we have to reset scalar evolution analyzer.  */
3220   scev_reset ();
3221
3222   return;
3223 }
3224
3225
3226 /* Function vect_transform_loop.
3227
3228    The analysis phase has determined that the loop is vectorizable.
3229    Vectorize the loop - created vectorized stmts to replace the scalar
3230    stmts in the loop, and update the loop exit condition.  */
3231
3232 static void
3233 vect_transform_loop (loop_vec_info loop_vinfo,
3234                      struct loops *loops ATTRIBUTE_UNUSED)
3235 {
3236   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3237   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3238   int nbbs = loop->num_nodes;
3239   block_stmt_iterator si;
3240   int i;
3241   tree ratio = NULL;
3242   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3243
3244   if (vect_debug_details (NULL))
3245     fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3246
3247
3248   /* Peel the loop if there are data refs with unknown alignment.
3249      Only one data ref with unknown store is allowed.  */
3250
3251   if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3252     vect_do_peeling_for_alignment (loop_vinfo, loops);
3253
3254   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3255      compile time constant), or it is a constant that doesn't divide by the
3256      vectorization factor, then an epilog loop needs to be created.
3257      We therefore duplicate the loop: the original loop will be vectorized,
3258      and will compute the first (n/VF) iterations. The second copy of the loop
3259      will remain scalar and will compute the remaining (n%VF) iterations.
3260      (VF is the vectorization factor).  */
3261
3262   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3263       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3264           && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3265     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3266   else
3267     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3268                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3269
3270   /* 1) Make sure the loop header has exactly two entries
3271      2) Make sure we have a preheader basic block.  */
3272
3273   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3274
3275   loop_split_edge_with (loop_preheader_edge (loop), NULL);
3276
3277
3278   /* FORNOW: the vectorizer supports only loops which body consist
3279      of one basic block (header + empty latch). When the vectorizer will
3280      support more involved loop forms, the order by which the BBs are
3281      traversed need to be reconsidered.  */
3282
3283   for (i = 0; i < nbbs; i++)
3284     {
3285       basic_block bb = bbs[i];
3286
3287       for (si = bsi_start (bb); !bsi_end_p (si);)
3288         {
3289           tree stmt = bsi_stmt (si);
3290           stmt_vec_info stmt_info;
3291           bool is_store;
3292
3293           if (vect_debug_details (NULL))
3294             {
3295               fprintf (dump_file, "------>vectorizing statement: ");
3296               print_generic_expr (dump_file, stmt, TDF_SLIM);
3297             }
3298           stmt_info = vinfo_for_stmt (stmt);
3299           gcc_assert (stmt_info);
3300           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3301             {
3302               bsi_next (&si);
3303               continue;
3304             }
3305 #ifdef ENABLE_CHECKING
3306           /* FORNOW: Verify that all stmts operate on the same number of
3307                      units and no inner unrolling is necessary.  */
3308           gcc_assert
3309                 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3310                  == vectorization_factor);
3311 #endif
3312           /* -------- vectorize statement ------------ */
3313           if (vect_debug_details (NULL))
3314             fprintf (dump_file, "transform statement.");
3315
3316           is_store = vect_transform_stmt (stmt, &si);
3317           if (is_store)
3318             {
3319               /* free the attached stmt_vec_info and remove the stmt.  */
3320               stmt_ann_t ann = stmt_ann (stmt);
3321               free (stmt_info);
3322               set_stmt_info (ann, NULL);
3323               bsi_remove (&si);
3324               continue;
3325             }
3326
3327           bsi_next (&si);
3328         }                       /* stmts in BB */
3329     }                           /* BBs in loop */
3330
3331   slpeel_make_loop_iterate_ntimes (loop, ratio);
3332
3333   if (vect_debug_details (loop))
3334     fprintf (dump_file,"Success! loop vectorized.");
3335   if (vect_debug_stats (loop))
3336     fprintf (dump_file, "LOOP VECTORIZED.");
3337 }
3338
3339
3340 /* Function vect_is_simple_use.
3341
3342    Input:
3343    LOOP - the loop that is being vectorized.
3344    OPERAND - operand of a stmt in LOOP.
3345    DEF - the defining stmt in case OPERAND is an SSA_NAME.
3346
3347    Returns whether a stmt with OPERAND can be vectorized.
3348    Supportable operands are constants, loop invariants, and operands that are
3349    defined by the current iteration of the loop. Unsupportable operands are
3350    those that are defined by a previous iteration of the loop (as is the case
3351    in reduction/induction computations).  */
3352
3353 static bool
3354 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3355 {
3356   tree def_stmt;
3357   basic_block bb;
3358
3359   if (def)
3360     *def = NULL_TREE;
3361
3362   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3363     return true;
3364
3365   if (TREE_CODE (operand) != SSA_NAME)
3366     return false;
3367
3368   def_stmt = SSA_NAME_DEF_STMT (operand);
3369   if (def_stmt == NULL_TREE )
3370     {
3371       if (vect_debug_details (NULL))
3372         fprintf (dump_file, "no def_stmt.");
3373       return false;
3374     }
3375
3376   /* empty stmt is expected only in case of a function argument.
3377      (Otherwise - we expect a phi_node or a modify_expr).  */
3378   if (IS_EMPTY_STMT (def_stmt))
3379     {
3380       tree arg = TREE_OPERAND (def_stmt, 0);
3381       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3382         return true;
3383       if (vect_debug_details (NULL))
3384         {
3385           fprintf (dump_file, "Unexpected empty stmt: ");
3386           print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3387         }
3388       return false;
3389     }
3390
3391   /* phi_node inside the loop indicates an induction/reduction pattern.
3392      This is not supported yet.  */
3393   bb = bb_for_stmt (def_stmt);
3394   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3395     {
3396       if (vect_debug_details (NULL))
3397         fprintf (dump_file, "reduction/induction - unsupported.");
3398       return false; /* FORNOW: not supported yet.  */
3399     }
3400
3401   /* Expecting a modify_expr or a phi_node.  */
3402   if (TREE_CODE (def_stmt) == MODIFY_EXPR
3403       || TREE_CODE (def_stmt) == PHI_NODE)
3404     {
3405       if (def)
3406         *def = def_stmt;
3407       return true;
3408     }
3409
3410   return false;
3411 }
3412
3413
3414 /* Function vect_analyze_operations.
3415
3416    Scan the loop stmts and make sure they are all vectorizable.  */
3417
3418 static bool
3419 vect_analyze_operations (loop_vec_info loop_vinfo)
3420 {
3421   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3422   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3423   int nbbs = loop->num_nodes;
3424   block_stmt_iterator si;
3425   unsigned int vectorization_factor = 0;
3426   int i;
3427   bool ok;
3428   tree scalar_type;
3429
3430   if (vect_debug_details (NULL))
3431     fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3432
3433   for (i = 0; i < nbbs; i++)
3434     {
3435       basic_block bb = bbs[i];
3436
3437       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3438         {
3439           tree stmt = bsi_stmt (si);
3440           unsigned int nunits;
3441           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3442           tree vectype;
3443
3444           if (vect_debug_details (NULL))
3445             {
3446               fprintf (dump_file, "==> examining statement: ");
3447               print_generic_expr (dump_file, stmt, TDF_SLIM);
3448             }
3449
3450           gcc_assert (stmt_info);
3451
3452           /* skip stmts which do not need to be vectorized.
3453              this is expected to include:
3454              - the COND_EXPR which is the loop exit condition
3455              - any LABEL_EXPRs in the loop
3456              - computations that are used only for array indexing or loop
3457              control  */
3458
3459           if (!STMT_VINFO_RELEVANT_P (stmt_info))
3460             {
3461               if (vect_debug_details (NULL))
3462                 fprintf (dump_file, "irrelevant.");
3463               continue;
3464             }
3465
3466           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3467             {
3468               if (vect_debug_stats (loop) || vect_debug_details (loop))
3469                 {
3470                   fprintf (dump_file, "not vectorized: vector stmt in loop:");
3471                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3472                 }
3473               return false;
3474             }
3475
3476           if (STMT_VINFO_DATA_REF (stmt_info))
3477             scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3478           else if (TREE_CODE (stmt) == MODIFY_EXPR)
3479             scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3480           else
3481             scalar_type = TREE_TYPE (stmt);
3482
3483           if (vect_debug_details (NULL))
3484             {
3485               fprintf (dump_file, "get vectype for scalar type:  ");
3486               print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3487             }
3488
3489           vectype = get_vectype_for_scalar_type (scalar_type);
3490           if (!vectype)
3491             {
3492               if (vect_debug_stats (loop) || vect_debug_details (loop))
3493                 {
3494                   fprintf (dump_file, "not vectorized: unsupported data-type ");
3495                   print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3496                 }
3497               return false;
3498             }
3499
3500           if (vect_debug_details (NULL))
3501             {
3502               fprintf (dump_file, "vectype: ");
3503               print_generic_expr (dump_file, vectype, TDF_SLIM);
3504             }
3505           STMT_VINFO_VECTYPE (stmt_info) = vectype;
3506
3507           ok = (vectorizable_operation (stmt, NULL, NULL)
3508                 || vectorizable_assignment (stmt, NULL, NULL)
3509                 || vectorizable_load (stmt, NULL, NULL)
3510                 || vectorizable_store (stmt, NULL, NULL));
3511
3512           if (!ok)
3513             {
3514               if (vect_debug_stats (loop) || vect_debug_details (loop))
3515                 {
3516                   fprintf (dump_file, "not vectorized: stmt not supported: ");
3517                   print_generic_expr (dump_file, stmt, TDF_SLIM);
3518                 }
3519               return false;
3520             }
3521
3522           nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3523           if (vect_debug_details (NULL))
3524             fprintf (dump_file, "nunits = %d", nunits);
3525
3526           if (vectorization_factor)
3527             {
3528               /* FORNOW: don't allow mixed units.
3529                  This restriction will be relaxed in the future.  */
3530               if (nunits != vectorization_factor)
3531                 {
3532                   if (vect_debug_stats (loop) || vect_debug_details (loop))
3533                     fprintf (dump_file, "not vectorized: mixed data-types");
3534                   return false;
3535                 }
3536             }
3537           else
3538             vectorization_factor = nunits;
3539
3540 #ifdef ENABLE_CHECKING
3541           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3542                         * vectorization_factor == UNITS_PER_SIMD_WORD);
3543 #endif
3544         }
3545     }
3546
3547   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
3548
3549   if (vectorization_factor <= 1)
3550     {
3551       if (vect_debug_stats (loop) || vect_debug_details (loop))
3552         fprintf (dump_file, "not vectorized: unsupported data-type");
3553       return false;
3554     }
3555   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3556
3557   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3558     fprintf (dump_file,
3559         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3560         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3561
3562   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3563       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3564     {
3565       if (vect_debug_stats (loop) || vect_debug_details (loop))
3566         fprintf (dump_file, "not vectorized: iteration count too small.");
3567       return false;
3568     }
3569
3570   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3571       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3572     {
3573       if (vect_debug_stats (loop) || vect_debug_details (loop))
3574         fprintf (dump_file, "epilog loop required.");
3575       if (!vect_can_advance_ivs_p (loop))
3576         {
3577           if (vect_debug_stats (loop) || vect_debug_details (loop))
3578             fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3579           return false;
3580         }
3581       if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3582         {
3583           if (vect_debug_stats (loop) || vect_debug_details (loop))
3584             fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3585           return false;
3586         }
3587     }
3588
3589   return true;
3590 }
3591
3592
3593 /* Function exist_non_indexing_operands_for_use_p
3594
3595    USE is one of the uses attached to STMT. Check if USE is
3596    used in STMT for anything other than indexing an array.  */
3597
3598 static bool
3599 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3600 {
3601   tree operand;
3602   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3603
3604   /* USE corresponds to some operand in STMT. If there is no data
3605      reference in STMT, then any operand that corresponds to USE
3606      is not indexing an array.  */
3607   if (!STMT_VINFO_DATA_REF (stmt_info))
3608     return true;
3609
3610   /* STMT has a data_ref. FORNOW this means that its of one of
3611      the following forms:
3612      -1- ARRAY_REF = var
3613      -2- var = ARRAY_REF
3614      (This should have been verified in analyze_data_refs).
3615
3616      'var' in the second case corresponds to a def, not a use,
3617      so USE cannot correspond to any operands that are not used
3618      for array indexing.
3619
3620      Therefore, all we need to check is if STMT falls into the
3621      first case, and whether var corresponds to USE.  */
3622
3623   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3624     return false;
3625
3626   operand = TREE_OPERAND (stmt, 1);
3627
3628   if (TREE_CODE (operand) != SSA_NAME)
3629     return false;
3630
3631   if (operand == use)
3632     return true;
3633
3634   return false;
3635 }
3636
3637
3638 /* Function vect_is_simple_iv_evolution.
3639
3640    FORNOW: A simple evolution of an induction variables in the loop is
3641    considered a polynomial evolution with constant step.  */
3642
3643 static bool
3644 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3645                                 tree * step, bool strict)
3646 {
3647   tree init_expr;
3648   tree step_expr;
3649
3650   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3651
3652   /* When there is no evolution in this loop, the evolution function
3653      is not "simple".  */
3654   if (evolution_part == NULL_TREE)
3655     return false;
3656
3657   /* When the evolution is a polynomial of degree >= 2
3658      the evolution function is not "simple".  */
3659   if (tree_is_chrec (evolution_part))
3660     return false;
3661
3662   step_expr = evolution_part;
3663   init_expr = unshare_expr (initial_condition (access_fn));
3664
3665   if (vect_debug_details (NULL))
3666     {
3667       fprintf (dump_file, "step: ");
3668       print_generic_expr (dump_file, step_expr, TDF_SLIM);
3669       fprintf (dump_file, ",  init: ");
3670       print_generic_expr (dump_file, init_expr, TDF_SLIM);
3671     }
3672
3673   *init = init_expr;
3674   *step = step_expr;
3675
3676   if (TREE_CODE (step_expr) != INTEGER_CST)
3677     {
3678       if (vect_debug_details (NULL))
3679         fprintf (dump_file, "step unknown.");
3680       return false;
3681     }
3682
3683   if (strict)
3684     if (!integer_onep (step_expr))
3685       {
3686         if (vect_debug_details (NULL))
3687           print_generic_expr (dump_file, step_expr, TDF_SLIM);
3688         return false;
3689       }
3690
3691   return true;
3692 }
3693
3694
3695 /* Function vect_analyze_scalar_cycles.
3696
3697    Examine the cross iteration def-use cycles of scalar variables, by
3698    analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3699    cycles that they represent do not impede vectorization.
3700
3701    FORNOW: Reduction as in the following loop, is not supported yet:
3702               loop1:
3703               for (i=0; i<N; i++)
3704                  sum += a[i];
3705            The cross-iteration cycle corresponding to variable 'sum' will be
3706            considered too complicated and will impede vectorization.
3707
3708    FORNOW: Induction as in the following loop, is not supported yet:
3709               loop2:
3710               for (i=0; i<N; i++)
3711                  a[i] = i;
3712
3713            However, the following loop *is* vectorizable:
3714               loop3:
3715               for (i=0; i<N; i++)
3716                  a[i] = b[i];
3717
3718            In both loops there exists a def-use cycle for the variable i:
3719               loop: i_2 = PHI (i_0, i_1)
3720                     a[i_2] = ...;
3721                     i_1 = i_2 + 1;
3722                     GOTO loop;
3723
3724            The evolution of the above cycle is considered simple enough,
3725            however, we also check that the cycle does not need to be
3726            vectorized, i.e - we check that the variable that this cycle
3727            defines is only used for array indexing or in stmts that do not
3728            need to be vectorized. This is not the case in loop2, but it
3729            *is* the case in loop3.  */
3730
3731 static bool
3732 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3733 {
3734   tree phi;
3735   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3736   basic_block bb = loop->header;
3737   tree dummy;
3738
3739   if (vect_debug_details (NULL))
3740     fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3741
3742   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3743     {
3744       tree access_fn = NULL;
3745
3746       if (vect_debug_details (NULL))
3747         {
3748           fprintf (dump_file, "Analyze phi: ");
3749           print_generic_expr (dump_file, phi, TDF_SLIM);
3750         }
3751
3752       /* Skip virtual phi's. The data dependences that are associated with
3753          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
3754
3755       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3756         {
3757           if (vect_debug_details (NULL))
3758             fprintf (dump_file, "virtual phi. skip.");
3759           continue;
3760         }
3761
3762       /* Analyze the evolution function.  */
3763
3764       /* FORNOW: The only scalar cross-iteration cycles that we allow are
3765          those of loop induction variables; This property is verified here.
3766
3767          Furthermore, if that induction variable is used in an operation
3768          that needs to be vectorized (i.e, is not solely used to index
3769          arrays and check the exit condition) - we do not support its
3770          vectorization yet. This property is verified in vect_is_simple_use,
3771          during vect_analyze_operations.  */
3772
3773       access_fn = /* instantiate_parameters
3774                      (loop,*/
3775          analyze_scalar_evolution (loop, PHI_RESULT (phi));
3776
3777       if (!access_fn)
3778         {
3779           if (vect_debug_stats (loop) || vect_debug_details (loop))
3780             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3781           return false;
3782         }
3783
3784       if (vect_debug_details (NULL))
3785         {
3786            fprintf (dump_file, "Access function of PHI: ");
3787            print_generic_expr (dump_file, access_fn, TDF_SLIM);
3788         }
3789
3790       if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3791                                         &dummy, false))
3792         {
3793           if (vect_debug_stats (loop) || vect_debug_details (loop))
3794             fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3795           return false;
3796         }
3797     }
3798
3799   return true;
3800 }
3801
3802
3803 /* Function vect_analyze_data_ref_dependence.
3804
3805    Return TRUE if there (might) exist a dependence between a memory-reference
3806    DRA and a memory-reference DRB.  */
3807
3808 static bool
3809 vect_analyze_data_ref_dependence (struct data_reference *dra,
3810                                   struct data_reference *drb,
3811                                   struct loop *loop)
3812 {
3813   bool differ_p;
3814   struct data_dependence_relation *ddr;
3815
3816   if (!array_base_name_differ_p (dra, drb, &differ_p))
3817     {
3818       if (vect_debug_stats (loop) || vect_debug_details (loop))
3819         {
3820           fprintf (dump_file,
3821                 "not vectorized: can't determine dependence between: ");
3822           print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3823           fprintf (dump_file, " and ");
3824           print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3825         }
3826       return true;
3827     }
3828
3829   if (differ_p)
3830     return false;
3831
3832   ddr = initialize_data_dependence_relation (dra, drb);
3833   compute_affine_dependence (ddr);
3834
3835   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3836     return false;
3837
3838   if (vect_debug_stats (loop) || vect_debug_details (loop))
3839     {
3840       fprintf (dump_file,
3841         "not vectorized: possible dependence between data-refs ");
3842       print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3843       fprintf (dump_file, " and ");
3844       print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3845     }
3846
3847   return true;
3848 }
3849
3850
3851 /* Function vect_analyze_data_ref_dependences.
3852
3853    Examine all the data references in the loop, and make sure there do not
3854    exist any data dependences between them.
3855
3856    TODO: dependences which distance is greater than the vectorization factor
3857          can be ignored.  */
3858
3859 static bool
3860 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3861 {
3862   unsigned int i, j;
3863   varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3864   varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3865   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3866
3867   /* Examine store-store (output) dependences.  */
3868
3869   if (vect_debug_details (NULL))
3870     fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3871
3872   if (vect_debug_details (NULL))
3873     fprintf (dump_file, "compare all store-store pairs.");
3874
3875   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3876     {
3877       for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3878         {
3879           struct data_reference *dra =
3880             VARRAY_GENERIC_PTR (loop_write_refs, i);
3881           struct data_reference *drb =
3882             VARRAY_GENERIC_PTR (loop_write_refs, j);
3883           if (vect_analyze_data_ref_dependence (dra, drb, loop))
3884             return false;
3885         }
3886     }
3887
3888   /* Examine load-store (true/anti) dependences.  */
3889
3890   if (vect_debug_details (NULL))
3891     fprintf (dump_file, "compare all load-store pairs.");
3892
3893   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3894     {
3895       for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3896         {
3897           struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3898           struct data_reference *drb =
3899             VARRAY_GENERIC_PTR (loop_write_refs, j);
3900           if (vect_analyze_data_ref_dependence (dra, drb, loop))
3901             return false;
3902         }
3903     }
3904
3905   return true;
3906 }
3907
3908
3909 /* Function vect_get_first_index.
3910
3911    REF is a data reference.
3912    If it is an ARRAY_REF: if its lower bound is simple enough,
3913    put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3914    If it is not an ARRAY_REF: REF has no "first index";
3915    ARRAY_FIRST_INDEX in zero, and the function returns TRUE.  */
3916
3917 static bool
3918 vect_get_first_index (tree ref, tree *array_first_index)
3919 {
3920   tree array_start;
3921
3922   if (TREE_CODE (ref) != ARRAY_REF)
3923     *array_first_index = size_zero_node;
3924   else
3925     {
3926       array_start = array_ref_low_bound (ref);
3927       if (!host_integerp (array_start, 0))
3928         {
3929           if (vect_debug_details (NULL))
3930             {
3931               fprintf (dump_file, "array min val not simple integer cst.");
3932               print_generic_expr (dump_file, array_start, TDF_DETAILS);
3933             }
3934           return false;
3935         }
3936       *array_first_index = array_start;
3937     }
3938
3939   return true;
3940 }
3941
3942
3943 /* Function vect_compute_array_base_alignment.
3944    A utility function of vect_compute_array_ref_alignment.
3945
3946    Compute the misalignment of ARRAY in bits.
3947
3948    Input:
3949    ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3950    VECTYPE - we are interested in the misalignment modulo the size of vectype.
3951              if NULL: don't compute misalignment, just return the base of ARRAY.
3952    PREV_DIMENSIONS - initialized to one.
3953    MISALIGNMENT - the computed misalignment in bits.
3954
3955    Output:
3956    If VECTYPE is not NULL:
3957      Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3958      the base of the array, and put the computed misalignment in MISALIGNMENT.
3959    If VECTYPE is NULL:
3960      Return the base of the array.
3961
3962    For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3963    a[idx_N]...[idx_2][idx_1] is
3964    {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3965     ... + idx_N * dim_0 * ... * dim_N-1}.
3966    (The misalignment of &a is not checked here).
3967    Note, that every term contains dim_0, therefore, if dim_0 is a
3968    multiple of NUNITS, the whole sum is a multiple of NUNITS.
3969    Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3970    NUINTS, we can say that the misalignment of the sum is equal to
3971    the misalignment of {idx_1 * dim_0}.  If idx_1 is not constant,
3972    we can't determine this array misalignment, and we return
3973    false.
3974    We proceed recursively in this manner, accumulating total misalignment
3975    and the multiplication of previous dimensions for correct misalignment
3976    calculation.  */
3977
3978 static tree
3979 vect_compute_array_base_alignment (tree array,
3980                                    tree vectype,
3981                                    tree *prev_dimensions,
3982                                    tree *misalignment)
3983 {
3984   tree index;
3985   tree domain;
3986   tree dimension_size;
3987   tree mis;
3988   tree bits_per_vectype;
3989   tree bits_per_vectype_unit;
3990
3991   /* The 'stop condition' of the recursion.  */
3992   if (TREE_CODE (array) != ARRAY_REF)
3993     return array;
3994
3995   if (!vectype)
3996     /* Just get the base decl.  */
3997     return vect_compute_array_base_alignment
3998                 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3999
4000   if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4001       !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4002     return NULL_TREE;
4003
4004   domain = TYPE_DOMAIN (TREE_TYPE (array));
4005   dimension_size =
4006         int_const_binop (PLUS_EXPR,
4007                 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4008                                              TYPE_MIN_VALUE (domain), 1),
4009                 size_one_node, 1);
4010
4011   /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4012      is a multiple of NUNITS:
4013
4014      dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4015    */
4016   mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4017          build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4018   if (integer_zerop (mis))
4019     /* This array is aligned. Continue just in order to get the base decl.  */
4020     return vect_compute_array_base_alignment
4021                 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4022
4023   index = TREE_OPERAND (array, 1);
4024   if (!host_integerp (index, 1))
4025     /* The current index is not constant.  */
4026     return NULL_TREE;
4027
4028   index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4029
4030   bits_per_vectype = fold_convert (unsigned_type_node,
4031     build_int_cst (NULL_TREE, BITS_PER_UNIT *
4032                  GET_MODE_SIZE (TYPE_MODE (vectype))));
4033   bits_per_vectype_unit =  fold_convert (unsigned_type_node,
4034     build_int_cst (NULL_TREE, BITS_PER_UNIT *
4035                  GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4036
4037   /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4038      earlier:
4039
4040      *misalignment =
4041        (*misalignment + index_val * dimension_size * *prev_dimensions)
4042                                                         % vectype_nunits;
4043    */
4044
4045   mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4046   mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4047   mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4048   mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4049   *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4050
4051
4052   *prev_dimensions = int_const_binop (MULT_EXPR,
4053                                 *prev_dimensions, dimension_size, 1);
4054
4055   return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4056                                             prev_dimensions,
4057                                             misalignment);
4058 }
4059
4060
4061 /* Function vect_compute_data_ref_alignment
4062
4063    Compute the misalignment of the data reference DR.
4064
4065    Output:
4066    1. If during the misalignment computation it is found that the data reference
4067       cannot be vectorized then false is returned.
4068    2. DR_MISALIGNMENT (DR) is defined.
4069
4070    FOR NOW: No analysis is actually performed. Misalignment is calculated
4071    only for trivial cases. TODO.  */
4072
4073 static bool
4074 vect_compute_data_ref_alignment (struct data_reference *dr,
4075                                  loop_vec_info loop_vinfo)
4076 {
4077   tree stmt = DR_STMT (dr);
4078   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4079   tree ref = DR_REF (dr);
4080   tree vectype;
4081   tree scalar_type;
4082   tree offset = size_zero_node;
4083   tree base, bit_offset, alignment;
4084   tree unit_bits = fold_convert (unsigned_type_node,
4085                                  build_int_cst (NULL_TREE, BITS_PER_UNIT));
4086   tree dr_base;
4087   bool base_aligned_p;
4088
4089   if (vect_debug_details (NULL))
4090     fprintf (dump_file, "vect_compute_data_ref_alignment:");
4091
4092   /* Initialize misalignment to unknown.  */
4093   DR_MISALIGNMENT (dr) = -1;
4094
4095   scalar_type = TREE_TYPE (ref);
4096   vectype = get_vectype_for_scalar_type (scalar_type);
4097   if (!vectype)
4098     {
4099       if (vect_debug_details (NULL))
4100         {
4101           fprintf (dump_file, "no vectype for stmt: ");
4102           print_generic_expr (dump_file, stmt, TDF_SLIM);
4103           fprintf (dump_file, " scalar_type: ");
4104           print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4105         }
4106       /* It is not possible to vectorize this data reference.  */
4107       return false;
4108     }
4109   STMT_VINFO_VECTYPE (stmt_info) = vectype;
4110   gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4111
4112   if (TREE_CODE (ref) == ARRAY_REF)
4113     dr_base = ref;
4114   else
4115     dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4116
4117   base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4118                           loop_vinfo, &bit_offset, &base_aligned_p);
4119   if (!base)
4120     {
4121       if (vect_debug_details (NULL))
4122         {
4123           fprintf (dump_file, "Unknown alignment for access: ");
4124           print_generic_expr (dump_file,
4125                               STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4126         }
4127       return true;
4128     }
4129
4130   if (!base_aligned_p)
4131     {
4132       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4133         {
4134           if (vect_debug_details (NULL))
4135             {
4136               fprintf (dump_file, "can't force alignment of ref: ");
4137               print_generic_expr (dump_file, ref, TDF_SLIM);
4138             }
4139           return true;
4140         }
4141
4142       /* Force the alignment of the decl.
4143          NOTE: This is the only change to the code we make during
4144          the analysis phase, before deciding to vectorize the loop.  */
4145       if (vect_debug_details (NULL))
4146         fprintf (dump_file, "force alignment");
4147       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4148       DECL_USER_ALIGN (base) = 1;
4149     }
4150
4151   /* At this point we assume that the base is aligned, and the offset from it
4152      (including index, if relevant) has been computed and is in BIT_OFFSET.  */
4153   gcc_assert (base_aligned_p
4154               || (TREE_CODE (base) == VAR_DECL
4155                   && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4156
4157   /* Convert into bytes.  */
4158   offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4159   /* Check that there is no remainder in bits.  */
4160   bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4161   if (!integer_zerop (bit_offset))
4162     {
4163       if (vect_debug_details (NULL))
4164         {
4165           fprintf (dump_file, "bit offset alignment: ");
4166           print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4167         }
4168       return false;
4169     }
4170
4171   /* Alignment required, in bytes:  */
4172   alignment = fold_convert (unsigned_type_node,
4173             build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4174
4175   /* Modulo alignment.  */
4176   offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4177   if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4178     {
4179       if (vect_debug_details (NULL))
4180         fprintf (dump_file, "unexpected misalign value");
4181       return false;
4182     }
4183
4184   DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4185
4186   if (vect_debug_details (NULL))
4187     fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4188
4189   return true;
4190 }
4191
4192
4193 /* Function vect_compute_array_ref_alignment
4194
4195    Compute the alignment of an array-ref.
4196    The alignment we compute here is relative to
4197    TYPE_ALIGN(VECTYPE) boundary.
4198
4199    Output:
4200    OFFSET - the alignment in bits
4201    Return value - the base of the array-ref. E.g,
4202                   if the array-ref is a.b[k].c[i][j] the returned
4203                   base is a.b[k].c
4204 */
4205
4206 static tree
4207 vect_compute_array_ref_alignment (struct data_reference *dr,
4208                                   loop_vec_info loop_vinfo,
4209                                   tree vectype,
4210                                   tree *offset)
4211 {
4212   tree array_first_index = size_zero_node;
4213   tree init;
4214   tree ref = DR_REF (dr);
4215   tree scalar_type = TREE_TYPE (ref);
4216   tree oprnd0 = TREE_OPERAND (ref, 0);
4217   tree dims = size_one_node;
4218   tree misalign = size_zero_node;
4219   tree next_ref, this_offset = size_zero_node;
4220   tree nunits;
4221   tree nbits;
4222
4223   if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4224     /* The reference is an array without its last index.  */
4225     next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4226                                                   &misalign);
4227   else
4228     next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4229                                                   &misalign);
4230   if (!vectype)
4231     /* Alignment is not requested. Just return the base.  */
4232     return next_ref;
4233
4234   /* Compute alignment.  */
4235   if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4236     return NULL_TREE;
4237   this_offset = misalign;
4238
4239   /* Check the first index accessed.  */
4240   if (!vect_get_first_index (ref, &array_first_index))
4241     {
4242       if (vect_debug_details (NULL))
4243         fprintf (dump_file, "no first_index for array.");
4244       return NULL_TREE;
4245     }
4246
4247   /* Check the index of the array_ref.  */
4248   init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4249                                         LOOP_VINFO_LOOP (loop_vinfo)->num);
4250
4251   /* FORNOW: In order to simplify the handling of alignment, we make sure
4252      that the first location at which the array is accessed ('init') is on an
4253      'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4254      This is too conservative, since we require that
4255      both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4256      NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4257      This should be relaxed in the future.  */
4258
4259   if (!init || !host_integerp (init, 0))
4260     {
4261       if (vect_debug_details (NULL))
4262         fprintf (dump_file, "non constant init. ");
4263       return NULL_TREE;
4264     }
4265
4266   /* bytes per scalar element: */
4267   nunits = fold_convert (unsigned_type_node,
4268         build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4269   nbits = int_const_binop (MULT_EXPR, nunits,
4270                            build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4271
4272   /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4273   misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4274   misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4275   misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4276
4277   /* TODO: allow negative misalign values.  */
4278   if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4279     {
4280       if (vect_debug_details (NULL))
4281         fprintf (dump_file, "unexpected misalign value");
4282       return NULL_TREE;
4283     }
4284   *offset = misalign;
4285   return next_ref;
4286 }
4287
4288
4289 /* Function vect_compute_data_refs_alignment
4290
4291    Compute the misalignment of data references in the loop.
4292    This pass may take place at function granularity instead of at loop
4293    granularity.
4294
4295    FOR NOW: No analysis is actually performed. Misalignment is calculated
4296    only for trivial cases. TODO.  */
4297
4298 static bool
4299 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4300 {
4301   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4302   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4303   unsigned int i;
4304
4305   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4306     {
4307       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4308       if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4309         return false;
4310     }
4311
4312   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4313     {
4314       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4315       if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4316         return false;
4317     }
4318
4319   return true;
4320 }
4321
4322
4323 /* Function vect_enhance_data_refs_alignment
4324
4325    This pass will use loop versioning and loop peeling in order to enhance
4326    the alignment of data references in the loop.
4327
4328    FOR NOW: we assume that whatever versioning/peeling takes place, only the
4329    original loop is to be vectorized; Any other loops that are created by
4330    the transformations performed in this pass - are not supposed to be
4331    vectorized. This restriction will be relaxed.  */
4332
4333 static void
4334 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4335 {
4336   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4337   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4338   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4339   unsigned int i;
4340
4341   /*
4342      This pass will require a cost model to guide it whether to apply peeling
4343      or versioning or a combination of the two. For example, the scheme that
4344      intel uses when given a loop with several memory accesses, is as follows:
4345      choose one memory access ('p') which alignment you want to force by doing
4346      peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4347      other accesses are not necessarily aligned, or (2) use loop versioning to
4348      generate one loop in which all accesses are aligned, and another loop in
4349      which only 'p' is necessarily aligned.
4350
4351      ("Automatic Intra-Register Vectorization for the Intel Architecture",
4352       Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4353       Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4354
4355      Devising a cost model is the most critical aspect of this work. It will
4356      guide us on which access to peel for, whether to use loop versioning, how
4357      many versions to create, etc. The cost model will probably consist of
4358      generic considerations as well as target specific considerations (on
4359      powerpc for example, misaligned stores are more painful than misaligned
4360      loads).
4361
4362      Here is the general steps involved in alignment enhancements:
4363
4364      -- original loop, before alignment analysis:
4365         for (i=0; i<N; i++){
4366           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
4367           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4368         }
4369
4370      -- After vect_compute_data_refs_alignment:
4371         for (i=0; i<N; i++){
4372           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4373           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
4374         }
4375
4376      -- Possibility 1: we do loop versioning:
4377      if (p is aligned) {
4378         for (i=0; i<N; i++){    # loop 1A
4379           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4380           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4381         }
4382      }
4383      else {
4384         for (i=0; i<N; i++){    # loop 1B
4385           x = q[i];                     # DR_MISALIGNMENT(q) = 3
4386           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4387         }
4388      }
4389
4390      -- Possibility 2: we do loop peeling:
4391      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4392         x = q[i];
4393         p[i] = y;
4394      }
4395      for (i = 3; i < N; i++){   # loop 2A
4396         x = q[i];                       # DR_MISALIGNMENT(q) = 0
4397         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
4398      }
4399
4400      -- Possibility 3: combination of loop peeling and versioning:
4401      for (i = 0; i < 3; i++){   # (scalar loop, not to be vectorized).
4402         x = q[i];
4403         p[i] = y;
4404      }
4405      if (p is aligned) {
4406         for (i = 3; i<N; i++){  # loop 3A
4407           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4408           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
4409         }
4410      }
4411      else {
4412         for (i = 3; i<N; i++){  # loop 3B
4413           x = q[i];                     # DR_MISALIGNMENT(q) = 0
4414           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
4415         }
4416      }
4417
4418      These loops are later passed to loop_transform to be vectorized. The
4419      vectorizer will use the alignment information to guide the transformation
4420      (whether to generate regular loads/stores, or with special handling for
4421      misalignment).
4422    */
4423
4424   /* (1) Peeling to force alignment.  */
4425
4426   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4427      Considerations:
4428      + How many accesses will become aligned due to the peeling
4429      - How many accesses will become unaligned due to the peeling,
4430        and the cost of misaligned accesses.
4431      - The cost of peeling (the extra runtime checks, the increase
4432        in code size).
4433
4434      The scheme we use FORNOW: peel to force the alignment of the first
4435      misaligned store in the loop.
4436      Rationale: misaligned stores are not yet supported.
4437
4438      TODO: Use a better cost model.  */
4439
4440   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4441     {
4442       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4443       if (!aligned_access_p (dr))
4444         {
4445           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4446           LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4447           break;
4448         }
4449     }
4450
4451   if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4452     {
4453       if (vect_debug_details (loop))
4454         fprintf (dump_file, "Peeling for alignment will not be applied.");
4455       return;
4456     }
4457   else
4458     if (vect_debug_details (loop))
4459       fprintf (dump_file, "Peeling for alignment will be applied.");
4460
4461
4462   /* (1.2) Update the alignment info according to the peeling factor.
4463            If the misalignment of the DR we peel for is M, then the
4464            peeling factor is VF - M, and the misalignment of each access DR_i
4465            in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4466            If the misalignment of the DR we peel for is unknown, then the
4467            misalignment of each access DR_i in the loop is also unknown.
4468
4469            FORNOW: set the misalignment of the accesses to unknown even
4470                    if the peeling factor is known at compile time.
4471
4472            TODO: - if the peeling factor is known at compile time, use that
4473                    when updating the misalignment info of the loop DRs.
4474                  - consider accesses that are known to have the same
4475                    alignment, even if that alignment is unknown.  */
4476
4477   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4478     {
4479       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4480       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4481         DR_MISALIGNMENT (dr) = 0;
4482       else
4483         DR_MISALIGNMENT (dr) = -1;
4484     }
4485   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4486     {
4487       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4488       if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4489         DR_MISALIGNMENT (dr) = 0;
4490       else
4491         DR_MISALIGNMENT (dr) = -1;
4492     }
4493 }
4494
4495
4496 /* Function vect_analyze_data_refs_alignment
4497
4498    Analyze the alignment of the data-references in the loop.
4499    FOR NOW: Until support for misliagned accesses is in place, only if all
4500    accesses are aligned can the loop be vectorized. This restriction will be
4501    relaxed.  */
4502
4503 static bool
4504 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4505 {
4506   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4507   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4508   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4509   enum dr_alignment_support supportable_dr_alignment;
4510   unsigned int i;
4511
4512   if (vect_debug_details (NULL))
4513     fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4514
4515
4516   /* This pass may take place at function granularity instead of at loop
4517      granularity.  */
4518
4519   if (!vect_compute_data_refs_alignment (loop_vinfo))
4520     {
4521       if (vect_debug_details (loop) || vect_debug_stats (loop))
4522         fprintf (dump_file,
4523                  "not vectorized: can't calculate alignment for data ref.");
4524       return false;
4525     }
4526
4527
4528   /* This pass will decide on using loop versioning and/or loop peeling in
4529      order to enhance the alignment of data references in the loop.  */
4530
4531   vect_enhance_data_refs_alignment (loop_vinfo);
4532
4533
4534   /* Finally, check that all the data references in the loop can be
4535      handled with respect to their alignment.  */
4536
4537   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4538     {
4539       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4540       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4541       if (!supportable_dr_alignment)
4542         {
4543           if (vect_debug_details (loop) || vect_debug_stats (loop))
4544             fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4545           return false;
4546         }
4547     }
4548   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4549     {
4550       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4551       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4552       if (!supportable_dr_alignment)
4553         {
4554           if (vect_debug_details (loop) || vect_debug_stats (loop))
4555             fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4556           return false;
4557         }
4558     }
4559
4560   return true;
4561 }
4562
4563
4564 /* Function vect_analyze_data_ref_access.
4565
4566    Analyze the access pattern of the data-reference DR. For now, a data access
4567    has to consecutive and aligned to be considered vectorizable.  */
4568
4569 static bool
4570 vect_analyze_data_ref_access (struct data_reference *dr)
4571 {
4572   varray_type access_fns = DR_ACCESS_FNS (dr);
4573   tree access_fn;
4574   tree init, step;
4575   unsigned int dimensions, i;
4576
4577   /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4578      i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4579      access is contiguous).  */
4580   dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4581
4582   for (i = 1; i < dimensions; i++) /* Not including the last dimension.  */
4583     {
4584       access_fn = DR_ACCESS_FN (dr, i);
4585
4586       if (evolution_part_in_loop_num (access_fn,
4587                                       loop_containing_stmt (DR_STMT (dr))->num))
4588         {
4589           /* Evolution part is not NULL in this loop (it is neither constant
4590              nor invariant).  */
4591           if (vect_debug_details (NULL))
4592             {
4593               fprintf (dump_file,
4594                        "not vectorized: complicated multidim. array access.");
4595               print_generic_expr (dump_file, access_fn, TDF_SLIM);
4596             }
4597           return false;
4598         }
4599     }
4600
4601   access_fn = DR_ACCESS_FN (dr, 0); /*  The last dimension access function.  */
4602   if (!evolution_function_is_constant_p (access_fn)
4603       && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4604                                        access_fn, &init, &step, true))
4605     {
4606       if (vect_debug_details (NULL))
4607         {
4608           fprintf (dump_file, "not vectorized: complicated access function.");
4609           print_generic_expr (dump_file, access_fn, TDF_SLIM);
4610         }
4611       return false;
4612     }
4613
4614   return true;
4615 }
4616
4617
4618 /* Function vect_analyze_data_ref_accesses.
4619
4620    Analyze the access pattern of all the data references in the loop.
4621
4622    FORNOW: the only access pattern that is considered vectorizable is a
4623            simple step 1 (consecutive) access.
4624
4625    FORNOW: handle only arrays and pointer accesses.  */
4626
4627 static bool
4628 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4629 {
4630   unsigned int i;
4631   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4632   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4633
4634   if (vect_debug_details (NULL))
4635     fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4636
4637   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4638     {
4639       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4640       bool ok = vect_analyze_data_ref_access (dr);
4641       if (!ok)
4642         {
4643           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4644               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4645             fprintf (dump_file, "not vectorized: complicated access pattern.");
4646           return false;
4647         }
4648     }
4649
4650   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4651     {
4652       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4653       bool ok = vect_analyze_data_ref_access (dr);
4654       if (!ok)
4655         {
4656           if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4657               || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4658             fprintf (dump_file, "not vectorized: complicated access pattern.");
4659           return false;
4660         }
4661     }
4662
4663   return true;
4664 }
4665
4666
4667 /* Function vect_analyze_pointer_ref_access.
4668
4669    Input:
4670    STMT - a stmt that contains a data-ref
4671    MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4672
4673    If the data-ref access is vectorizable, return a data_reference structure
4674    that represents it (DR). Otherwise - return NULL.  */
4675
4676 static struct data_reference *
4677 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4678 {
4679   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4680   struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4681   tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4682   tree init, step;
4683   int step_val;
4684   tree reftype, innertype;
4685   enum machine_mode innermode;
4686   tree indx_access_fn;
4687   int loopnum = loop->num;
4688   struct data_reference *dr;
4689
4690   if (!access_fn)
4691     {
4692       if (vect_debug_stats (loop) || vect_debug_details (loop))
4693         fprintf (dump_file, "not vectorized: complicated pointer access.");
4694       return NULL;
4695     }
4696
4697   if (vect_debug_details (NULL))
4698     {
4699       fprintf (dump_file, "Access function of ptr: ");
4700       print_generic_expr (dump_file, access_fn, TDF_SLIM);
4701     }
4702
4703   if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4704     {
4705       if (vect_debug_stats (loop) || vect_debug_details (loop))
4706         fprintf (dump_file, "not vectorized: pointer access is not simple.");
4707       return NULL;
4708     }
4709
4710   STRIP_NOPS (init);
4711
4712   if (!host_integerp (step,0))
4713     {
4714       if (vect_debug_stats (loop) || vect_debug_details (loop))
4715         fprintf (dump_file,
4716                 "not vectorized: non constant step for pointer access.");
4717       return NULL;
4718     }
4719
4720   step_val = TREE_INT_CST_LOW (step);
4721
4722   reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4723   if (TREE_CODE (reftype) != POINTER_TYPE)
4724     {
4725       if (vect_debug_stats (loop) || vect_debug_details (loop))
4726         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4727       return NULL;
4728     }
4729
4730   reftype = TREE_TYPE (init);
4731   if (TREE_CODE (reftype) != POINTER_TYPE)
4732     {
4733       if (vect_debug_stats (loop) || vect_debug_details (loop))
4734         fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4735       return NULL;
4736     }
4737
4738   innertype = TREE_TYPE (reftype);
4739   innermode = TYPE_MODE (innertype);
4740   if (GET_MODE_SIZE (innermode) != step_val)
4741     {
4742       /* FORNOW: support only consecutive access */
4743       if (vect_debug_stats (loop) || vect_debug_details (loop))
4744         fprintf (dump_file, "not vectorized: non consecutive access.");
4745       return NULL;
4746     }
4747
4748   indx_access_fn =
4749         build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4750   if (vect_debug_details (NULL))
4751     {
4752       fprintf (dump_file, "Access function of ptr indx: ");
4753       print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4754     }
4755   dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4756   return dr;
4757 }
4758
4759
4760 /* Function vect_get_symbl_and_dr.
4761
4762    The function returns SYMBL - the relevant variable for
4763    memory tag (for aliasing purposes).
4764    Also data reference structure DR is created.
4765
4766    Input:
4767    MEMREF - data reference in STMT
4768    IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4769
4770    Output:
4771    DR - data_reference struct for MEMREF
4772    return value - the relevant variable for memory tag (for aliasing purposes).
4773
4774 */
4775
4776 static tree
4777 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4778                        loop_vec_info loop_vinfo, struct data_reference **dr)
4779 {
4780   tree symbl, oprnd0, oprnd1;
4781   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4782   tree offset;
4783   tree array_base, base;
4784   struct data_reference *new_dr;
4785   bool base_aligned_p;
4786
4787   *dr = NULL;
4788   switch (TREE_CODE (memref))
4789     {
4790     case INDIRECT_REF:
4791       new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4792       if (! new_dr)
4793         return NULL_TREE;
4794       *dr = new_dr;
4795       symbl = DR_BASE_NAME (new_dr);
4796       STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4797
4798       switch (TREE_CODE (symbl))
4799         {
4800         case PLUS_EXPR:
4801         case MINUS_EXPR:
4802           oprnd0 = TREE_OPERAND (symbl, 0);
4803           oprnd1 = TREE_OPERAND (symbl, 1);
4804
4805           STRIP_NOPS(oprnd1);
4806           /* Only {address_base + offset} expressions are supported,
4807              where address_base can be POINTER_TYPE or ARRAY_TYPE and
4808              offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4809              TODO: swap operands if {offset + address_base}.  */
4810           if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4811                && TREE_CODE (oprnd1) != INTEGER_CST)
4812               || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4813             return NULL_TREE;
4814
4815           if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4816             symbl = oprnd0;
4817           else
4818             symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4819                                            loop_vinfo, &new_dr);
4820
4821         case SSA_NAME:
4822         case ADDR_EXPR:
4823           /* symbl remains unchanged.  */
4824           break;
4825
4826         default:
4827           if (vect_debug_details (NULL))
4828             {
4829               fprintf (dump_file, "unhandled data ref: ");
4830               print_generic_expr (dump_file, memref, TDF_SLIM);
4831               fprintf (dump_file, " (symbl ");
4832               print_generic_expr (dump_file, symbl, TDF_SLIM);
4833               fprintf (dump_file, ") in stmt  ");
4834               print_generic_expr (dump_file, stmt, TDF_SLIM);
4835             }
4836           return NULL_TREE;
4837         }
4838       break;
4839
4840     case ARRAY_REF:
4841       offset = size_zero_node;
4842
4843       /* Store the array base in the stmt info.
4844          For one dimensional array ref a[i], the base is a,
4845          for multidimensional a[i1][i2]..[iN], the base is
4846          a[i1][i2]..[iN-1].  */
4847       array_base = TREE_OPERAND (memref, 0);
4848       STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4849
4850       new_dr = analyze_array (stmt, memref, is_read);
4851       *dr = new_dr;
4852
4853       /* Find the relevant symbol for aliasing purposes.  */
4854       base = DR_BASE_NAME (new_dr);
4855       switch (TREE_CODE (base))
4856         {
4857         case VAR_DECL:
4858           symbl = base;
4859           break;
4860
4861         case INDIRECT_REF:
4862           symbl = TREE_OPERAND (base, 0);
4863           break;
4864
4865         case COMPONENT_REF:
4866           /* Could have recorded more accurate information -
4867              i.e, the actual FIELD_DECL that is being referenced -
4868              but later passes expect VAR_DECL as the nmt.  */
4869           symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4870                                         loop_vinfo, &offset, &base_aligned_p);
4871           if (symbl)
4872             break;
4873           /* fall through */
4874         default:
4875           if (vect_debug_details (NULL))
4876             {
4877               fprintf (dump_file, "unhandled struct/class field access ");
4878               print_generic_expr (dump_file, stmt, TDF_SLIM);
4879             }
4880           return NULL_TREE;
4881         }
4882       break;
4883
4884     default:
4885       if (vect_debug_details (NULL))
4886         {
4887           fprintf (dump_file, "unhandled data ref: ");
4888           print_generic_expr (dump_file, memref, TDF_SLIM);
4889           fprintf (dump_file, " in stmt  ");
4890           print_generic_expr (dump_file, stmt, TDF_SLIM);
4891         }
4892       return NULL_TREE;
4893     }
4894   return symbl;
4895 }
4896
4897
4898 /* Function vect_analyze_data_refs.
4899
4900    Find all the data references in the loop.
4901
4902    FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4903            which base is really an array (not a pointer) and which alignment
4904            can be forced. This restriction will be relaxed.  */
4905
4906 static bool
4907 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4908 {
4909   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4910   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4911   int nbbs = loop->num_nodes;
4912   block_stmt_iterator si;
4913   int j;
4914   struct data_reference *dr;
4915   tree tag;
4916   tree address_base;
4917   bool base_aligned_p;
4918   tree offset;
4919
4920   if (vect_debug_details (NULL))
4921     fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4922
4923   for (j = 0; j < nbbs; j++)
4924     {
4925       basic_block bb = bbs[j];
4926       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4927         {
4928           bool is_read = false;
4929           tree stmt = bsi_stmt (si);
4930           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4931           v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4932           v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4933           vuse_optype vuses = STMT_VUSE_OPS (stmt);
4934           varray_type *datarefs = NULL;
4935           int nvuses, nv_may_defs, nv_must_defs;
4936           tree memref = NULL;
4937           tree symbl;
4938
4939           /* Assumption: there exists a data-ref in stmt, if and only if
4940              it has vuses/vdefs.  */
4941
4942           if (!vuses && !v_may_defs && !v_must_defs)
4943             continue;
4944
4945           nvuses = NUM_VUSES (vuses);
4946           nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4947           nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4948
4949           if (nvuses && (nv_may_defs || nv_must_defs))
4950             {
4951               if (vect_debug_details (NULL))
4952                 {
4953                   fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4954                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4955                 }
4956               return false;
4957             }
4958
4959           if (TREE_CODE (stmt) != MODIFY_EXPR)
4960             {
4961               if (vect_debug_details (NULL))
4962                 {
4963                   fprintf (dump_file, "unexpected vops in stmt: ");
4964                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4965                 }
4966               return false;
4967             }
4968
4969           if (vuses)
4970             {
4971               memref = TREE_OPERAND (stmt, 1);
4972               datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4973               is_read = true;
4974             }
4975           else /* vdefs */
4976             {
4977               memref = TREE_OPERAND (stmt, 0);
4978               datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4979               is_read = false;
4980             }
4981
4982           /* Analyze MEMREF. If it is of a supported form, build data_reference
4983              struct for it (DR) and find the relevant symbol for aliasing
4984              purposes.  */
4985           symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4986                                          &dr);
4987           if (!symbl)
4988             {
4989               if (vect_debug_stats (loop) || vect_debug_details (loop))
4990                 {
4991                   fprintf (dump_file, "not vectorized: unhandled data ref: ");
4992                   print_generic_expr (dump_file, stmt, TDF_SLIM);
4993                 }
4994               return false;
4995             }
4996
4997           /* Find and record the memtag assigned to this data-ref.  */
4998            switch (TREE_CODE (symbl))
4999             {
5000             case VAR_DECL:
5001               STMT_VINFO_MEMTAG (stmt_info) = symbl;
5002               break;
5003
5004             case SSA_NAME:
5005               symbl = SSA_NAME_VAR (symbl);
5006               tag = get_var_ann (symbl)->type_mem_tag;
5007               if (!tag)
5008                 {
5009                   tree ptr = TREE_OPERAND (memref, 0);
5010                   if (TREE_CODE (ptr) == SSA_NAME)
5011                     tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5012                 }
5013               if (!tag)
5014                 {
5015                   if (vect_debug_stats (loop) || vect_debug_details (loop))
5016                     fprintf (dump_file, "not vectorized: no memtag for ref.");
5017                   return false;
5018                 }
5019               STMT_VINFO_MEMTAG (stmt_info) = tag;
5020               break;
5021
5022             case ADDR_EXPR:
5023               address_base = TREE_OPERAND (symbl, 0);
5024
5025               switch (TREE_CODE (address_base))
5026                 {
5027                 case ARRAY_REF:
5028                   {
5029                     struct data_reference *tmp_dr;
5030
5031                     tmp_dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5032                                             DR_IS_READ (dr));
5033                     tag = vect_get_base_and_bit_offset
5034                       (tmp_dr, DR_BASE_NAME (tmp_dr),
5035                        NULL_TREE, loop_vinfo, &offset, &base_aligned_p);
5036                     if (!tag)
5037                       {
5038                         if (vect_debug_stats (loop)
5039                             || vect_debug_details (loop))
5040                           fprintf (dump_file,
5041                                    "not vectorized: no memtag for ref.");
5042                         return false;
5043                       }
5044                     STMT_VINFO_MEMTAG (stmt_info) = tag;
5045                   }
5046
5047                   break;
5048
5049                 case VAR_DECL:
5050                   STMT_VINFO_MEMTAG (stmt_info) = address_base;
5051                   break;
5052
5053                 default:
5054                   if (vect_debug_stats (loop) || vect_debug_details (loop))
5055                     {
5056                       fprintf (dump_file,
5057                                "not vectorized: unhandled address expr: ");
5058                       print_generic_expr (dump_file, stmt, TDF_SLIM);
5059                     }
5060                   return false;
5061                 }
5062               break;
5063
5064             default:
5065               if (vect_debug_stats (loop) || vect_debug_details (loop))
5066                 {
5067                   fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5068                   print_generic_expr (dump_file, memref, TDF_SLIM);
5069                 }
5070               return false;
5071             }
5072
5073           VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5074           STMT_VINFO_DATA_REF (stmt_info) = dr;
5075         }
5076     }
5077
5078   return true;
5079 }
5080
5081
5082 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
5083
5084 /* Function vect_mark_relevant.
5085
5086    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
5087
5088 static void
5089 vect_mark_relevant (varray_type worklist, tree stmt)
5090 {
5091   stmt_vec_info stmt_info;
5092
5093   if (vect_debug_details (NULL))
5094     fprintf (dump_file, "mark relevant.");
5095
5096   if (TREE_CODE (stmt) == PHI_NODE)
5097     {
5098       VARRAY_PUSH_TREE (worklist, stmt);
5099       return;
5100     }
5101
5102   stmt_info = vinfo_for_stmt (stmt);
5103
5104   if (!stmt_info)
5105     {
5106       if (vect_debug_details (NULL))
5107         {
5108           fprintf (dump_file, "mark relevant: no stmt info!!.");
5109           print_generic_expr (dump_file, stmt, TDF_SLIM);
5110         }
5111       return;
5112     }
5113
5114   if (STMT_VINFO_RELEVANT_P (stmt_info))
5115     {
5116       if (vect_debug_details (NULL))
5117         fprintf (dump_file, "already marked relevant.");
5118       return;
5119     }
5120
5121   STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5122   VARRAY_PUSH_TREE (worklist, stmt);
5123 }
5124
5125
5126 /* Function vect_stmt_relevant_p.
5127
5128    Return true if STMT in loop that is represented by LOOP_VINFO is
5129    "relevant for vectorization".
5130
5131    A stmt is considered "relevant for vectorization" if:
5132    - it has uses outside the loop.
5133    - it has vdefs (it alters memory).
5134    - control stmts in the loop (except for the exit condition).
5135
5136    CHECKME: what other side effects would the vectorizer allow?  */
5137
5138 static bool
5139 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5140 {
5141   v_may_def_optype v_may_defs;
5142   v_must_def_optype v_must_defs;
5143   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5144   int i;
5145   dataflow_t df;
5146   int num_uses;
5147
5148   /* cond stmt other than loop exit cond.  */
5149   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5150     return true;
5151
5152   /* changing memory.  */
5153   v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5154   v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5155   if (v_may_defs || v_must_defs)
5156     {
5157       if (vect_debug_details (NULL))
5158         fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5159       return true;
5160     }
5161
5162   /* uses outside the loop.  */
5163   df = get_immediate_uses (stmt);
5164   num_uses = num_immediate_uses (df);
5165   for (i = 0; i < num_uses; i++)
5166     {
5167       tree use = immediate_use (df, i);
5168       basic_block bb = bb_for_stmt (use);
5169       if (!flow_bb_inside_loop_p (loop, bb))
5170         {
5171           if (vect_debug_details (NULL))
5172             fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5173           return true;
5174         }
5175     }
5176
5177   return false;
5178 }
5179
5180
5181 /* Function vect_mark_stmts_to_be_vectorized.
5182
5183    Not all stmts in the loop need to be vectorized. For example:
5184
5185      for i...
5186        for j...
5187    1.    T0 = i + j
5188    2.    T1 = a[T0]
5189
5190    3.    j = j + 1
5191
5192    Stmt 1 and 3 do not need to be vectorized, because loop control and
5193    addressing of vectorized data-refs are handled differently.
5194
5195    This pass detects such stmts.  */
5196
5197 static bool
5198 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5199 {
5200   varray_type worklist;
5201   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5202   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5203   unsigned int nbbs = loop->num_nodes;
5204   block_stmt_iterator si;
5205   tree stmt;
5206   stmt_ann_t ann;
5207   unsigned int i;
5208   int j;
5209   use_optype use_ops;
5210   stmt_vec_info stmt_info;
5211
5212   if (vect_debug_details (NULL))
5213     fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5214
5215   VARRAY_TREE_INIT (worklist, 64, "work list");
5216
5217   /* 1. Init worklist.  */
5218
5219   for (i = 0; i < nbbs; i++)
5220     {
5221       basic_block bb = bbs[i];
5222       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5223         {
5224           stmt = bsi_stmt (si);
5225
5226           if (vect_debug_details (NULL))
5227             {
5228               fprintf (dump_file, "init: stmt relevant? ");
5229               print_generic_expr (dump_file, stmt, TDF_SLIM);
5230             }
5231
5232           stmt_info = vinfo_for_stmt (stmt);
5233           STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5234
5235           if (vect_stmt_relevant_p (stmt, loop_vinfo))
5236             vect_mark_relevant (worklist, stmt);
5237         }
5238     }
5239
5240
5241   /* 2. Process_worklist */
5242
5243   while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5244     {
5245       stmt = VARRAY_TOP_TREE (worklist);
5246       VARRAY_POP (worklist);
5247
5248       if (vect_debug_details (NULL))
5249         {
5250           fprintf (dump_file, "worklist: examine stmt: ");
5251           print_generic_expr (dump_file, stmt, TDF_SLIM);
5252         }
5253
5254       /* Examine the USES in this statement. Mark all the statements which
5255          feed this statement's uses as "relevant", unless the USE is used as
5256          an array index.  */
5257
5258       if (TREE_CODE (stmt) == PHI_NODE)
5259         {
5260           /* follow the def-use chain inside the loop.  */
5261           for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5262             {
5263               tree arg = PHI_ARG_DEF (stmt, j);
5264               tree def_stmt = NULL_TREE;
5265               basic_block bb;
5266               if (!vect_is_simple_use (arg, loop, &def_stmt))
5267                 {
5268                   if (vect_debug_details (NULL))
5269                     fprintf (dump_file, "worklist: unsupported use.");
5270                   varray_clear (worklist);
5271                   return false;
5272                 }
5273               if (!def_stmt)
5274                 continue;
5275
5276               if (vect_debug_details (NULL))
5277                 {
5278                   fprintf (dump_file, "worklist: def_stmt: ");
5279                   print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5280                 }
5281
5282               bb = bb_for_stmt (def_stmt);
5283               if (flow_bb_inside_loop_p (loop, bb))
5284                 vect_mark_relevant (worklist, def_stmt);
5285             }
5286         }
5287
5288       ann = stmt_ann (stmt);
5289       use_ops = USE_OPS (ann);
5290
5291       for (i = 0; i < NUM_USES (use_ops); i++)
5292         {
5293           tree use = USE_OP (use_ops, i);
5294
5295           /* We are only interested in uses that need to be vectorized. Uses
5296              that are used for address computation are not considered relevant.
5297            */
5298           if (exist_non_indexing_operands_for_use_p (use, stmt))
5299             {
5300               tree def_stmt = NULL_TREE;
5301               basic_block bb;
5302               if (!vect_is_simple_use (use, loop, &def_stmt))
5303                 {
5304                   if (vect_debug_details (NULL))
5305                     fprintf (dump_file, "worklist: unsupported use.");
5306                   varray_clear (worklist);
5307                   return false;
5308                 }
5309
5310               if (!def_stmt)
5311                 continue;
5312
5313               if (vect_debug_details (NULL))
5314                 {
5315                   fprintf (dump_file, "worklist: examine use %d: ", i);
5316                   print_generic_expr (dump_file, use, TDF_SLIM);
5317                 }
5318
5319               bb = bb_for_stmt (def_stmt);
5320               if (flow_bb_inside_loop_p (loop, bb))
5321                 vect_mark_relevant (worklist, def_stmt);
5322             }
5323         }
5324     }                           /* while worklist */
5325
5326   varray_clear (worklist);
5327   return true;
5328 }
5329
5330
5331 /* Function vect_can_advance_ivs_p
5332
5333    In case the number of iterations that LOOP iterates in unknown at compile
5334    time, an epilog loop will be generated, and the loop induction variables
5335    (IVs) will be "advanced" to the value they are supposed to take just before
5336    the epilog loop.  Here we check that the access function of the loop IVs
5337    and the expression that represents the loop bound are simple enough.
5338    These restrictions will be relaxed in the future.  */
5339
5340 static bool
5341 vect_can_advance_ivs_p (struct loop *loop)
5342 {
5343   basic_block bb = loop->header;
5344   tree phi;
5345
5346   /* Analyze phi functions of the loop header.  */
5347
5348   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5349     {
5350       tree access_fn = NULL;
5351       tree evolution_part;
5352
5353       if (vect_debug_details (NULL))
5354         {
5355           fprintf (dump_file, "Analyze phi: ");
5356           print_generic_expr (dump_file, phi, TDF_SLIM);
5357         }
5358
5359       /* Skip virtual phi's. The data dependences that are associated with
5360          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
5361
5362       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5363         {
5364           if (vect_debug_details (NULL))
5365             fprintf (dump_file, "virtual phi. skip.");
5366           continue;
5367         }
5368
5369       /* Analyze the evolution function.  */
5370
5371       access_fn = instantiate_parameters
5372         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5373
5374       if (!access_fn)
5375         {
5376           if (vect_debug_details (NULL))
5377             fprintf (dump_file, "No Access function.");
5378           return false;
5379         }
5380
5381       if (vect_debug_details (NULL))
5382         {
5383           fprintf (dump_file, "Access function of PHI: ");
5384           print_generic_expr (dump_file, access_fn, TDF_SLIM);
5385         }
5386
5387       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5388
5389       if (evolution_part == NULL_TREE)
5390         return false;
5391
5392       /* FORNOW: We do not transform initial conditions of IVs
5393          which evolution functions are a polynomial of degree >= 2.  */
5394
5395       if (tree_is_chrec (evolution_part))
5396         return false;
5397     }
5398
5399   return true;
5400 }
5401
5402
5403 /* Function vect_get_loop_niters.
5404
5405    Determine how many iterations the loop is executed.
5406    If an expression that represents the number of iterations
5407    can be constructed, place it in NUMBER_OF_ITERATIONS.
5408    Return the loop exit condition.  */
5409
5410 static tree
5411 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5412 {
5413   tree niters;
5414
5415   if (vect_debug_details (NULL))
5416     fprintf (dump_file, "\n<<get_loop_niters>>\n");
5417
5418   niters = number_of_iterations_in_loop (loop);
5419
5420   if (niters != NULL_TREE
5421       && niters != chrec_dont_know)
5422     {
5423       *number_of_iterations = niters;
5424
5425       if (vect_debug_details (NULL))
5426         {
5427           fprintf (dump_file, "==> get_loop_niters:" );
5428           print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5429         }
5430     }
5431
5432   return get_loop_exit_condition (loop);
5433 }
5434
5435
5436 /* Function vect_analyze_loop_form.
5437
5438    Verify the following restrictions (some may be relaxed in the future):
5439    - it's an inner-most loop
5440    - number of BBs = 2 (which are the loop header and the latch)
5441    - the loop has a pre-header
5442    - the loop has a single entry and exit
5443    - the loop exit condition is simple enough, and the number of iterations
5444      can be analyzed (a countable loop).  */
5445
5446 static loop_vec_info
5447 vect_analyze_loop_form (struct loop *loop)
5448 {
5449   loop_vec_info loop_vinfo;
5450   tree loop_cond;
5451   tree number_of_iterations = NULL;
5452   bool rescan = false;
5453
5454   if (vect_debug_details (loop))
5455     fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5456
5457   if (loop->inner
5458       || !loop->single_exit
5459       || loop->num_nodes != 2
5460       || EDGE_COUNT (loop->header->preds) != 2
5461       || loop->num_entries != 1)
5462     {
5463       if (vect_debug_stats (loop) || vect_debug_details (loop))
5464         {
5465           fprintf (dump_file, "not vectorized: bad loop form. ");
5466           if (loop->inner)
5467             fprintf (dump_file, "nested loop.");
5468           else if (!loop->single_exit)
5469             fprintf (dump_file, "multiple exits.");
5470           else if (loop->num_nodes != 2)
5471             fprintf (dump_file, "too many BBs in loop.");
5472           else if (EDGE_COUNT (loop->header->preds) != 2)
5473             fprintf (dump_file, "too many incoming edges.");
5474           else if (loop->num_entries != 1)
5475             fprintf (dump_file, "too many entries.");
5476         }
5477
5478       return NULL;
5479     }
5480
5481   /* We assume that the loop exit condition is at the end of the loop. i.e,
5482      that the loop is represented as a do-while (with a proper if-guard
5483      before the loop if needed), where the loop header contains all the
5484      executable statements, and the latch is empty.  */
5485   if (!empty_block_p (loop->latch))
5486     {
5487       if (vect_debug_stats (loop) || vect_debug_details (loop))
5488         fprintf (dump_file, "not vectorized: unexpectd loop form.");
5489       return NULL;
5490     }
5491
5492   /* Make sure we have a preheader basic block.  */
5493   if (!loop->pre_header)
5494     {
5495       rescan = true;
5496       loop_split_edge_with (loop_preheader_edge (loop), NULL);
5497     }
5498
5499   /* Make sure there exists a single-predecessor exit bb:  */
5500   if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5501     {
5502       rescan = true;
5503       loop_split_edge_with (loop->exit_edges[0], NULL);
5504     }
5505
5506   if (rescan)
5507     {
5508       flow_loop_scan (loop, LOOP_ALL);
5509       /* Flow loop scan does not update loop->single_exit field.  */
5510       loop->single_exit = loop->exit_edges[0];
5511     }
5512
5513   if (empty_block_p (loop->header))
5514     {
5515       if (vect_debug_stats (loop) || vect_debug_details (loop))
5516         fprintf (dump_file, "not vectorized: empty loop.");
5517       return NULL;
5518     }
5519
5520   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5521   if (!loop_cond)
5522     {
5523       if (vect_debug_stats (loop) || vect_debug_details (loop))
5524         fprintf (dump_file, "not vectorized: complicated exit condition.");
5525       return NULL;
5526     }
5527
5528   if (!number_of_iterations)
5529     {
5530       if (vect_debug_stats (loop) || vect_debug_details (loop))
5531         fprintf (dump_file,
5532                  "not vectorized: number of iterations cannot be computed.");
5533       return NULL;
5534     }
5535
5536   if (chrec_contains_undetermined (number_of_iterations))
5537     {
5538       if (vect_debug_details (NULL))
5539         fprintf (dump_file, "Infinite number of iterations.");
5540       return false;
5541     }
5542
5543   loop_vinfo = new_loop_vec_info (loop);
5544   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5545
5546   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5547     {
5548       if (vect_debug_details (loop))
5549         {
5550           fprintf (dump_file, "loop bound unknown.\n");
5551           fprintf (dump_file, "Symbolic number of iterations is ");
5552           print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5553         }
5554     }
5555   else
5556   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5557     {
5558       if (vect_debug_stats (loop) || vect_debug_details (loop))
5559         fprintf (dump_file, "not vectorized: number of iterations = 0.");
5560       return NULL;
5561     }
5562
5563   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5564
5565   return loop_vinfo;
5566 }
5567
5568
5569 /* Function vect_analyze_loop.
5570
5571    Apply a set of analyses on LOOP, and create a loop_vec_info struct
5572    for it. The different analyses will record information in the
5573    loop_vec_info struct.  */
5574
5575 static loop_vec_info
5576 vect_analyze_loop (struct loop *loop)
5577 {
5578   bool ok;
5579   loop_vec_info loop_vinfo;
5580
5581   if (vect_debug_details (NULL))
5582     fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5583
5584   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
5585
5586   loop_vinfo = vect_analyze_loop_form (loop);
5587   if (!loop_vinfo)
5588     {
5589       if (vect_debug_details (loop))
5590         fprintf (dump_file, "bad loop form.");
5591       return NULL;
5592     }
5593
5594   /* Find all data references in the loop (which correspond to vdefs/vuses)
5595      and analyze their evolution in the loop.
5596
5597      FORNOW: Handle only simple, array references, which
5598      alignment can be forced, and aligned pointer-references.  */
5599
5600   ok = vect_analyze_data_refs (loop_vinfo);
5601   if (!ok)
5602     {
5603       if (vect_debug_details (loop))
5604         fprintf (dump_file, "bad data references.");
5605       destroy_loop_vec_info (loop_vinfo);
5606       return NULL;
5607     }
5608
5609   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
5610
5611   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5612   if (!ok)
5613     {
5614       if (vect_debug_details (loop))
5615         fprintf (dump_file, "unexpected pattern.");
5616       if (vect_debug_details (loop))
5617         fprintf (dump_file, "not vectorized: unexpected pattern.");
5618       destroy_loop_vec_info (loop_vinfo);
5619       return NULL;
5620     }
5621
5622   /* Check that all cross-iteration scalar data-flow cycles are OK.
5623      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
5624
5625   ok = vect_analyze_scalar_cycles (loop_vinfo);
5626   if (!ok)
5627     {
5628       if (vect_debug_details (loop))
5629         fprintf (dump_file, "bad scalar cycle.");
5630       destroy_loop_vec_info (loop_vinfo);
5631       return NULL;
5632     }
5633
5634   /* Analyze data dependences between the data-refs in the loop.
5635      FORNOW: fail at the first data dependence that we encounter.  */
5636
5637   ok = vect_analyze_data_ref_dependences (loop_vinfo);
5638   if (!ok)
5639     {
5640       if (vect_debug_details (loop))
5641         fprintf (dump_file, "bad data dependence.");
5642       destroy_loop_vec_info (loop_vinfo);
5643       return NULL;
5644     }
5645
5646   /* Analyze the access patterns of the data-refs in the loop (consecutive,
5647      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
5648
5649   ok = vect_analyze_data_ref_accesses (loop_vinfo);
5650   if (!ok)
5651     {
5652       if (vect_debug_details (loop))
5653         fprintf (dump_file, "bad data access.");
5654       destroy_loop_vec_info (loop_vinfo);
5655       return NULL;
5656     }
5657
5658   /* Analyze the alignment of the data-refs in the loop.
5659      FORNOW: Only aligned accesses are handled.  */
5660
5661   ok = vect_analyze_data_refs_alignment (loop_vinfo);
5662   if (!ok)
5663     {
5664       if (vect_debug_details (loop))
5665         fprintf (dump_file, "bad data alignment.");
5666       destroy_loop_vec_info (loop_vinfo);
5667       return NULL;
5668     }
5669
5670   /* Scan all the operations in the loop and make sure they are
5671      vectorizable.  */
5672
5673   ok = vect_analyze_operations (loop_vinfo);
5674   if (!ok)
5675     {
5676       if (vect_debug_details (loop))
5677         fprintf (dump_file, "bad operation or unsupported loop bound.");
5678       destroy_loop_vec_info (loop_vinfo);
5679       return NULL;
5680     }
5681
5682   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5683
5684   return loop_vinfo;
5685 }
5686
5687
5688 /* Function need_imm_uses_for.
5689
5690    Return whether we ought to include information for 'var'
5691    when calculating immediate uses.  For this pass we only want use
5692    information for non-virtual variables.  */
5693
5694 static bool
5695 need_imm_uses_for (tree var)
5696 {
5697   return is_gimple_reg (var);
5698 }
5699
5700
5701 /* Function vectorize_loops.
5702
5703    Entry Point to loop vectorization phase.  */
5704
5705 void
5706 vectorize_loops (struct loops *loops)
5707 {
5708   unsigned int i, loops_num;
5709   unsigned int num_vectorized_loops = 0;
5710
5711   /* Does the target support SIMD?  */
5712   /* FORNOW: until more sophisticated machine modelling is in place.  */
5713   if (!UNITS_PER_SIMD_WORD)
5714     {
5715       if (vect_debug_details (NULL))
5716         fprintf (dump_file, "vectorizer: target vector size is not defined.");
5717       return;
5718     }
5719
5720 #ifdef ENABLE_CHECKING
5721   verify_loop_closed_ssa ();
5722 #endif
5723
5724   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5725
5726   /*  ----------- Analyze loops. -----------  */
5727
5728   /* If some loop was duplicated, it gets bigger number
5729      than all previously defined loops. This fact allows us to run
5730      only over initial loops skipping newly generated ones.  */
5731   loops_num = loops->num;
5732   for (i = 1; i < loops_num; i++)
5733     {
5734       loop_vec_info loop_vinfo;
5735       struct loop *loop = loops->parray[i];
5736
5737       if (!loop)
5738         continue;
5739
5740       loop_vinfo = vect_analyze_loop (loop);
5741       loop->aux = loop_vinfo;
5742
5743       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5744         continue;
5745
5746       vect_transform_loop (loop_vinfo, loops);
5747       num_vectorized_loops++;
5748     }
5749
5750   if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5751     fprintf (dump_file, "\nvectorized %u loops in function.\n",
5752              num_vectorized_loops);
5753
5754   /*  ----------- Finalize. -----------  */
5755
5756   free_df ();
5757   for (i = 1; i < loops_num; i++)
5758     {
5759       struct loop *loop = loops->parray[i];
5760       loop_vec_info loop_vinfo;
5761
5762       if (!loop)
5763         continue;
5764       loop_vinfo = loop->aux;
5765       destroy_loop_vec_info (loop_vinfo);
5766       loop->aux = NULL;
5767     }
5768
5769   rewrite_into_ssa (false);
5770   rewrite_into_loop_closed_ssa (); /* FORNOW */
5771   bitmap_clear (vars_to_rename);
5772 }