+ /* Make sure bsi points to the stmt that is being vectorized. */
+ gcc_assert (stmt == bsi_stmt (*bsi));
+
+#ifdef USE_MAPPED_LOCATION
+ SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
+#else
+ SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
+#endif
+}
+
+
+#define ADJUST_IN_EPILOG 1
+
+/* Function get_initial_def_for_reduction
+
+ Input:
+ STMT - a stmt that performs a reduction operation in the loop.
+ INIT_VAL - the initial value of the reduction variable
+
+ Output:
+ SCALAR_DEF - a tree that holds a value to be added to the final result
+ of the reduction (used for "ADJUST_IN_EPILOG" - see below).
+ Return a vector variable, initialized according to the operation that STMT
+ performs. This vector will be used as the initial value of the
+ vector of partial results.
+
+ Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
+ add: [0,0,...,0,0]
+ mult: [1,1,...,1,1]
+ min/max: [init_val,init_val,..,init_val,init_val]
+ bit and/or: [init_val,init_val,..,init_val,init_val]
+ and when necessary (e.g. add/mult case) let the caller know
+ that it needs to adjust the result by init_val.
+
+ Option2: Initialize the vector as follows:
+ add: [0,0,...,0,init_val]
+ mult: [1,1,...,1,init_val]
+ min/max: [init_val,init_val,...,init_val]
+ bit and/or: [init_val,init_val,...,init_val]
+ and no adjustments are needed.
+
+ For example, for the following code:
+
+ s = init_val;
+ for (i=0;i<n;i++)
+ s = s + a[i];
+
+ STMT is 's = s + a[i]', and the reduction variable is 's'.
+ For a vector of 4 units, we want to return either [0,0,0,init_val],
+ or [0,0,0,0] and let the caller know that it needs to adjust
+ the result at the end by 'init_val'.
+
+ FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
+ TODO: Use some cost-model to estimate which scheme is more profitable.
+*/
+
+static tree
+get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
+{
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
+ int nelements;
+ enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
+ tree type = TREE_TYPE (init_val);
+ tree def;
+ tree vec, t = NULL_TREE;
+ bool need_epilog_adjust;
+ int i;
+
+ gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
+
+ switch (code)
+ {
+ case WIDEN_SUM_EXPR:
+ case DOT_PROD_EXPR:
+ case PLUS_EXPR:
+ if (INTEGRAL_TYPE_P (type))
+ def = build_int_cst (type, 0);
+ else
+ def = build_real (type, dconst0);
+
+#ifdef ADJUST_IN_EPILOG
+ /* All the 'nunits' elements are set to 0. The final result will be
+ adjusted by 'init_val' at the loop epilog. */
+ nelements = nunits;
+ need_epilog_adjust = true;
+#else
+ /* 'nunits - 1' elements are set to 0; The last element is set to
+ 'init_val'. No further adjustments at the epilog are needed. */
+ nelements = nunits - 1;
+ need_epilog_adjust = false;
+#endif
+ break;
+
+ case MIN_EXPR:
+ case MAX_EXPR:
+ def = init_val;
+ nelements = nunits;
+ need_epilog_adjust = false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ for (i = nelements - 1; i >= 0; --i)
+ t = tree_cons (NULL_TREE, def, t);
+
+ if (nelements == nunits - 1)
+ {
+ /* Set the last element of the vector. */
+ t = tree_cons (NULL_TREE, init_val, t);
+ nelements += 1;
+ }
+ gcc_assert (nelements == nunits);
+
+ if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
+ vec = build_vector (vectype, t);
+ else
+ vec = build_constructor_from_list (vectype, t);
+
+ if (!need_epilog_adjust)
+ *scalar_def = NULL_TREE;
+ else
+ *scalar_def = init_val;
+
+ return vect_init_vector (stmt, vec);
+}
+
+
+/* Function vect_create_epilog_for_reduction
+
+ Create code at the loop-epilog to finalize the result of a reduction
+ computation.
+
+ VECT_DEF is a vector of partial results.
+ REDUC_CODE is the tree-code for the epilog reduction.
+ STMT is the scalar reduction stmt that is being vectorized.
+ REDUCTION_PHI is the phi-node that carries the reduction computation.
+
+ This function:
+ 1. Creates the reduction def-use cycle: sets the the arguments for
+ REDUCTION_PHI:
+ The loop-entry argument is the vectorized initial-value of the reduction.
+ The loop-latch argument is VECT_DEF - the vector of partial sums.
+ 2. "Reduces" the vector of partial results VECT_DEF into a single result,
+ by applying the operation specified by REDUC_CODE if available, or by
+ other means (whole-vector shifts or a scalar loop).
+ The function also creates a new phi node at the loop exit to preserve
+ loop-closed form, as illustrated below.
+
+ The flow at the entry to this function:
+
+ loop:
+ vec_def = phi <null, null> # REDUCTION_PHI
+ VECT_DEF = vector_stmt # vectorized form of STMT
+ s_loop = scalar_stmt # (scalar) STMT
+ loop_exit:
+ s_out0 = phi <s_loop> # (scalar) EXIT_PHI
+ use <s_out0>
+ use <s_out0>
+
+ The above is transformed by this function into:
+
+ loop:
+ vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
+ VECT_DEF = vector_stmt # vectorized form of STMT
+ s_loop = scalar_stmt # (scalar) STMT
+ loop_exit:
+ s_out0 = phi <s_loop> # (scalar) EXIT_PHI
+ v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
+ v_out2 = reduce <v_out1>
+ s_out3 = extract_field <v_out2, 0>
+ s_out4 = adjust_result <s_out3>
+ use <s_out4>
+ use <s_out4>
+*/