/* Data references and dependences detectors.
- Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+ Free Software Foundation, Inc.
Contributed by Sebastian Pop <pop@cri.ensmp.fr>
This file is part of GCC.
/* This pass walks a given loop structure searching for array
references. The information about the array accesses is recorded
- in DATA_REFERENCE structures.
-
- The basic test for determining the dependences is:
- given two access functions chrec1 and chrec2 to a same array, and
- x and y two vectors from the iteration domain, the same element of
+ in DATA_REFERENCE structures.
+
+ The basic test for determining the dependences is:
+ given two access functions chrec1 and chrec2 to a same array, and
+ x and y two vectors from the iteration domain, the same element of
the array is accessed twice at iterations x and y if and only if:
| chrec1 (x) == chrec2 (y).
-
+
The goals of this analysis are:
-
+
- to determine the independence: the relation between two
independent accesses is qualified with the chrec_known (this
information allows a loop parallelization),
-
+
- when two data references access the same data, to qualify the
dependence relation with classic dependence representations:
-
+
- distance vectors
- direction vectors
- loop carried level dependence
- polyhedron dependence
or with the chains of recurrences based representation,
-
- - to define a knowledge base for storing the data dependence
+
+ - to define a knowledge base for storing the data dependence
information,
-
+
- to define an interface to access this data.
-
-
+
+
Definitions:
-
+
- subscript: given two array accesses a subscript is the tuple
composed of the access functions for a given dimension. Example:
Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
(f1, g1), (f2, g2), (f3, g3).
- Diophantine equation: an equation whose coefficients and
- solutions are integer constants, for example the equation
+ solutions are integer constants, for example the equation
| 3*x + 2*y = 1
has an integer solution x = 1 and y = -1.
-
+
References:
-
+
- "Advanced Compilation for High Performance Computing" by Randy
Allen and Ken Kennedy.
- http://citeseer.ist.psu.edu/goff91practical.html
-
- - "Loop Transformations for Restructuring Compilers - The Foundations"
+ http://citeseer.ist.psu.edu/goff91practical.html
+
+ - "Loop Transformations for Restructuring Compilers - The Foundations"
by Utpal Banerjee.
-
+
*/
#include "config.h"
#include "system.h"
#include "coretypes.h"
-#include "tm.h"
-#include "ggc.h"
-#include "tree.h"
-
-/* These RTL headers are needed for basic-block.h. */
-#include "rtl.h"
-#include "basic-block.h"
-#include "diagnostic.h"
+#include "gimple-pretty-print.h"
#include "tree-flow.h"
-#include "tree-dump.h"
-#include "timevar.h"
#include "cfgloop.h"
-#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
#include "langhooks.h"
+#include "tree-affine.h"
static struct datadep_stats
{
struct loop *);
/* Returns true iff A divides B. */
-static inline bool
-tree_fold_divides_p (tree a, tree b)
+static inline bool
+tree_fold_divides_p (const_tree a, const_tree b)
{
gcc_assert (TREE_CODE (a) == INTEGER_CST);
gcc_assert (TREE_CODE (b) == INTEGER_CST);
- return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
+ return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
}
/* Returns true iff A divides B. */
-static inline bool
+static inline bool
int_divides_p (int a, int b)
{
return ((b % a) == 0);
\f
-/* Dump into FILE all the data references from DATAREFS. */
+/* Dump into FILE all the data references from DATAREFS. */
-void
+void
dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
{
unsigned int i;
struct data_reference *dr;
- for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
+ FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
dump_data_reference (file, dr);
}
-/* Dump into FILE all the dependence relations from DDRS. */
+/* Dump into STDERR all the data references from DATAREFS. */
+
+DEBUG_FUNCTION void
+debug_data_references (VEC (data_reference_p, heap) *datarefs)
+{
+ dump_data_references (stderr, datarefs);
+}
+
+/* Dump to STDERR all the dependence relations from DDRS. */
+
+DEBUG_FUNCTION void
+debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
+{
+ dump_data_dependence_relations (stderr, ddrs);
+}
+
+/* Dump into FILE all the dependence relations from DDRS. */
-void
-dump_data_dependence_relations (FILE *file,
+void
+dump_data_dependence_relations (FILE *file,
VEC (ddr_p, heap) *ddrs)
{
unsigned int i;
struct data_dependence_relation *ddr;
- for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
+ FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
dump_data_dependence_relation (file, ddr);
}
+/* Print to STDERR the data_reference DR. */
+
+DEBUG_FUNCTION void
+debug_data_reference (struct data_reference *dr)
+{
+ dump_data_reference (stderr, dr);
+}
+
/* Dump function for a DATA_REFERENCE structure. */
-void
-dump_data_reference (FILE *outf,
+void
+dump_data_reference (FILE *outf,
struct data_reference *dr)
{
unsigned int i;
-
- fprintf (outf, "(Data Ref: \n stmt: ");
- print_generic_stmt (outf, DR_STMT (dr), 0);
- fprintf (outf, " ref: ");
+
+ fprintf (outf, "#(Data Ref: \n");
+ fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
+ fprintf (outf, "# stmt: ");
+ print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
+ fprintf (outf, "# ref: ");
print_generic_stmt (outf, DR_REF (dr), 0);
- fprintf (outf, " base_object: ");
+ fprintf (outf, "# base_object: ");
print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
-
+
for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
{
- fprintf (outf, " Access function %d: ", i);
+ fprintf (outf, "# Access function %d: ", i);
print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
}
- fprintf (outf, ")\n");
+ fprintf (outf, "#)\n");
}
/* Dumps the affine function described by FN to the file OUTF. */
/* Dump function for a SUBSCRIPT structure. */
-void
+void
dump_subscript (FILE *outf, struct subscript *subscript)
{
conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
fprintf (outf, " last_conflict: ");
print_generic_stmt (outf, last_iteration, 0);
}
-
+
cf = SUB_CONFLICTS_IN_B (subscript);
fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
dump_conflict_function (outf, cf);
for (eq = 0; eq < length; eq++)
{
- enum data_dependence_direction dir = dirv[eq];
+ enum data_dependence_direction dir = ((enum data_dependence_direction)
+ dirv[eq]);
switch (dir)
{
unsigned j;
lambda_vector v;
- for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
+ FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
print_direction_vector (outf, v, length);
}
+/* Print out a vector VEC of length N to OUTFILE. */
+
+static inline void
+print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ fprintf (outfile, "%3d ", vector[i]);
+ fprintf (outfile, "\n");
+}
+
/* Print a vector of distance vectors. */
void
unsigned j;
lambda_vector v;
- for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
+ FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
print_lambda_vector (outf, v, length);
}
/* Debug version. */
-void
+DEBUG_FUNCTION void
debug_data_dependence_relation (struct data_dependence_relation *ddr)
{
dump_data_dependence_relation (stderr, ddr);
/* Dump function for a DATA_DEPENDENCE_RELATION structure. */
-void
-dump_data_dependence_relation (FILE *outf,
+void
+dump_data_dependence_relation (FILE *outf,
struct data_dependence_relation *ddr)
{
struct data_reference *dra, *drb;
+ fprintf (outf, "(Data Dep: \n");
+
+ if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ {
+ if (ddr)
+ {
+ dra = DDR_A (ddr);
+ drb = DDR_B (ddr);
+ if (dra)
+ dump_data_reference (outf, dra);
+ else
+ fprintf (outf, " (nil)\n");
+ if (drb)
+ dump_data_reference (outf, drb);
+ else
+ fprintf (outf, " (nil)\n");
+ }
+ fprintf (outf, " (don't know)\n)\n");
+ return;
+ }
+
dra = DDR_A (ddr);
drb = DDR_B (ddr);
- fprintf (outf, "(Data Dep: \n");
- if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
- fprintf (outf, " (don't know)\n");
-
- else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+ dump_data_reference (outf, dra);
+ dump_data_reference (outf, drb);
+
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
fprintf (outf, " (no dependence)\n");
-
+
else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
{
unsigned int i;
fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
fprintf (outf, " loop nest: (");
- for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
+ FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
fprintf (outf, "%d ", loopi->num);
fprintf (outf, ")\n");
/* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
void
-dump_data_dependence_direction (FILE *file,
+dump_data_dependence_direction (FILE *file,
enum data_dependence_direction dir)
{
switch (dir)
{
- case dir_positive:
+ case dir_positive:
fprintf (file, "+");
break;
-
+
case dir_negative:
fprintf (file, "-");
break;
-
+
case dir_equal:
fprintf (file, "=");
break;
-
+
case dir_positive_or_negative:
fprintf (file, "+-");
break;
-
- case dir_positive_or_equal:
+
+ case dir_positive_or_equal:
fprintf (file, "+=");
break;
-
- case dir_negative_or_equal:
+
+ case dir_negative_or_equal:
fprintf (file, "-=");
break;
-
- case dir_star:
- fprintf (file, "*");
+
+ case dir_star:
+ fprintf (file, "*");
break;
-
- default:
+
+ default:
break;
}
}
dependence vectors, or in other words the number of loops in the
considered nest. */
-void
+void
dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
{
unsigned int i, j;
struct data_dependence_relation *ddr;
lambda_vector v;
- for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
+ FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
{
- for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
+ FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
{
fprintf (file, "DISTANCE_V (");
print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
fprintf (file, ")\n");
}
- for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
+ FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
{
fprintf (file, "DIRECTION_V (");
print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
/* Dumps the data dependence relations DDRS in FILE. */
-void
+void
dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
{
unsigned int i;
struct data_dependence_relation *ddr;
- for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
+ FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
dump_data_dependence_relation (file, ddr);
fprintf (file, "\n\n");
}
-/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
- will be ssizetype. */
+/* Helper function for split_constant_offset. Expresses OP0 CODE OP1
+ (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
+ constant of type ssizetype, and returns true. If we cannot do this
+ with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
+ is returned. */
-static void
-split_constant_offset (tree exp, tree *var, tree *off)
+static bool
+split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
+ tree *var, tree *off)
{
- tree type = TREE_TYPE (exp), otype;
tree var0, var1;
tree off0, off1;
- enum tree_code code;
+ enum tree_code ocode = code;
- *var = exp;
- STRIP_NOPS (exp);
- otype = TREE_TYPE (exp);
- code = TREE_CODE (exp);
+ *var = NULL_TREE;
+ *off = NULL_TREE;
switch (code)
{
case INTEGER_CST:
*var = build_int_cst (type, 0);
- *off = fold_convert (ssizetype, exp);
- return;
+ *off = fold_convert (ssizetype, op0);
+ return true;
case POINTER_PLUS_EXPR:
- code = PLUS_EXPR;
+ ocode = PLUS_EXPR;
/* FALLTHROUGH */
case PLUS_EXPR:
case MINUS_EXPR:
- split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
- split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
- *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
- var0, var1));
- *off = size_binop (code, off0, off1);
- return;
+ split_constant_offset (op0, &var0, &off0);
+ split_constant_offset (op1, &var1, &off1);
+ *var = fold_build2 (code, type, var0, var1);
+ *off = size_binop (ocode, off0, off1);
+ return true;
case MULT_EXPR:
- off1 = TREE_OPERAND (exp, 1);
- if (TREE_CODE (off1) != INTEGER_CST)
- break;
+ if (TREE_CODE (op1) != INTEGER_CST)
+ return false;
- split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
- *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
- var0, off1));
- *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
- return;
+ split_constant_offset (op0, &var0, &off0);
+ *var = fold_build2 (MULT_EXPR, type, var0, op1);
+ *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
+ return true;
case ADDR_EXPR:
{
- tree op, base, poffset;
+ tree base, poffset;
HOST_WIDE_INT pbitsize, pbitpos;
enum machine_mode pmode;
int punsignedp, pvolatilep;
- op = TREE_OPERAND (exp, 0);
- if (!handled_component_p (op))
- break;
-
- base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
+ op0 = TREE_OPERAND (op0, 0);
+ base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
&pmode, &punsignedp, &pvolatilep, false);
if (pbitpos % BITS_PER_UNIT != 0)
- break;
+ return false;
base = build_fold_addr_expr (base);
off0 = ssize_int (pbitpos / BITS_PER_UNIT);
{
split_constant_offset (poffset, &poffset, &off1);
off0 = size_binop (PLUS_EXPR, off0, off1);
- base = fold_build2 (PLUS_EXPR, TREE_TYPE (base),
- base,
- fold_convert (TREE_TYPE (base), poffset));
+ if (POINTER_TYPE_P (TREE_TYPE (base)))
+ base = fold_build_pointer_plus (base, poffset);
+ else
+ base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
+ fold_convert (TREE_TYPE (base), poffset));
}
- *var = fold_convert (type, base);
+ var0 = fold_convert (type, base);
+
+ /* If variable length types are involved, punt, otherwise casts
+ might be converted into ARRAY_REFs in gimplify_conversion.
+ To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
+ possibly no longer appears in current GIMPLE, might resurface.
+ This perhaps could run
+ if (CONVERT_EXPR_P (var0))
+ {
+ gimplify_conversion (&var0);
+ // Attempt to fill in any within var0 found ARRAY_REF's
+ // element size from corresponding op embedded ARRAY_REF,
+ // if unsuccessful, just punt.
+ } */
+ while (POINTER_TYPE_P (type))
+ type = TREE_TYPE (type);
+ if (int_size_in_bytes (type) < 0)
+ return false;
+
+ *var = var0;
*off = off0;
- return;
+ return true;
}
case SSA_NAME:
{
- tree def_stmt = SSA_NAME_DEF_STMT (exp);
- if (TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
- {
- tree def_stmt_rhs = GIMPLE_STMT_OPERAND (def_stmt, 1);
+ gimple def_stmt = SSA_NAME_DEF_STMT (op0);
+ enum tree_code subcode;
- if (!TREE_SIDE_EFFECTS (def_stmt_rhs)
- && EXPR_P (def_stmt_rhs)
- && !REFERENCE_CLASS_P (def_stmt_rhs))
- {
- split_constant_offset (def_stmt_rhs, &var0, &off0);
- var0 = fold_convert (type, var0);
- *var = var0;
- *off = off0;
- return;
- }
+ if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
+ return false;
+
+ var0 = gimple_assign_rhs1 (def_stmt);
+ subcode = gimple_assign_rhs_code (def_stmt);
+ var1 = gimple_assign_rhs2 (def_stmt);
+
+ return split_constant_offset_1 (type, var0, subcode, var1, var, off);
+ }
+ CASE_CONVERT:
+ {
+ /* We must not introduce undefined overflow, and we must not change the value.
+ Hence we're okay if the inner type doesn't overflow to start with
+ (pointer or signed), the outer type also is an integer or pointer
+ and the outer precision is at least as large as the inner. */
+ tree itype = TREE_TYPE (op0);
+ if ((POINTER_TYPE_P (itype)
+ || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+ && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
+ && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
+ {
+ split_constant_offset (op0, &var0, off);
+ *var = fold_convert (type, var0);
+ return true;
}
- break;
+ return false;
}
default:
- break;
+ return false;
}
+}
+
+/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
+ will be ssizetype. */
+
+void
+split_constant_offset (tree exp, tree *var, tree *off)
+{
+ tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
+ enum tree_code code;
+ *var = exp;
*off = ssize_int (0);
+ STRIP_NOPS (exp);
+
+ if (tree_is_chrec (exp)
+ || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
+ return;
+
+ otype = TREE_TYPE (exp);
+ code = TREE_CODE (exp);
+ extract_ops_from_tree (exp, &code, &op0, &op1);
+ if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
+ {
+ *var = fold_convert (type, e);
+ *off = o;
+ }
}
/* Returns the address ADDR of an object in a canonical shape (without nop
return build_fold_addr_expr (TREE_OPERAND (addr, 0));
}
-/* Analyzes the behavior of the memory reference DR in the innermost loop that
- contains it. */
+/* Analyzes the behavior of the memory reference DR in the innermost loop or
+ basic block that contains it. Returns true if analysis succeed or false
+ otherwise. */
-void
-dr_analyze_innermost (struct data_reference *dr)
+bool
+dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
{
- tree stmt = DR_STMT (dr);
+ gimple stmt = DR_STMT (dr);
struct loop *loop = loop_containing_stmt (stmt);
tree ref = DR_REF (dr);
HOST_WIDE_INT pbitsize, pbitpos;
int punsignedp, pvolatilep;
affine_iv base_iv, offset_iv;
tree init, dinit, step;
+ bool in_loop = (loop && loop->num);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "analyze_innermost: ");
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "failed: bit offset alignment.\n");
- return;
+ return false;
}
- base = build_fold_addr_expr (base);
- if (!simple_iv (loop, stmt, base, &base_iv, false))
+ if (TREE_CODE (base) == MEM_REF)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of base is not affine.\n");
- return;
+ if (!integer_zerop (TREE_OPERAND (base, 1)))
+ {
+ if (!poffset)
+ {
+ double_int moff = mem_ref_offset (base);
+ poffset = double_int_to_tree (sizetype, moff);
+ }
+ else
+ poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
+ }
+ base = TREE_OPERAND (base, 0);
}
+ else
+ base = build_fold_addr_expr (base);
+
+ if (in_loop)
+ {
+ if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
+ false))
+ {
+ if (nest)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "failed: evolution of base is not"
+ " affine.\n");
+ return false;
+ }
+ else
+ {
+ base_iv.base = base;
+ base_iv.step = ssize_int (0);
+ base_iv.no_overflow = true;
+ }
+ }
+ }
+ else
+ {
+ base_iv.base = base;
+ base_iv.step = ssize_int (0);
+ base_iv.no_overflow = true;
+ }
+
if (!poffset)
{
offset_iv.base = ssize_int (0);
offset_iv.step = ssize_int (0);
}
- else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
+ else
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "failed: evolution of offset is not affine.\n");
- return;
+ if (!in_loop)
+ {
+ offset_iv.base = poffset;
+ offset_iv.step = ssize_int (0);
+ }
+ else if (!simple_iv (loop, loop_containing_stmt (stmt),
+ poffset, &offset_iv, false))
+ {
+ if (nest)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "failed: evolution of offset is not"
+ " affine.\n");
+ return false;
+ }
+ else
+ {
+ offset_iv.base = poffset;
+ offset_iv.step = ssize_int (0);
+ }
+ }
}
init = ssize_int (pbitpos / BITS_PER_UNIT);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "success.\n");
+
+ return true;
}
/* Determines the base object and the list of indices of memory reference
- DR, analyzed in loop nest NEST. */
+ DR, analyzed in LOOP and instantiated in loop nest NEST. */
static void
-dr_analyze_indices (struct data_reference *dr, struct loop *nest)
+dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
{
- tree stmt = DR_STMT (dr);
- struct loop *loop = loop_containing_stmt (stmt);
VEC (tree, heap) *access_fns = NULL;
- tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
+ tree ref, *aref, op;
tree base, off, access_fn;
+ basic_block before_loop;
+
+ /* If analyzing a basic-block there are no indices to analyze
+ and thus no access functions. */
+ if (!nest)
+ {
+ DR_BASE_OBJECT (dr) = DR_REF (dr);
+ DR_ACCESS_FNS (dr) = NULL;
+ return;
+ }
+
+ ref = unshare_expr (DR_REF (dr));
+ before_loop = block_before_loop (nest);
+
+ /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
+ into a two element array with a constant index. The base is
+ then just the immediate underlying object. */
+ if (TREE_CODE (ref) == REALPART_EXPR)
+ {
+ ref = TREE_OPERAND (ref, 0);
+ VEC_safe_push (tree, heap, access_fns, integer_zero_node);
+ }
+ else if (TREE_CODE (ref) == IMAGPART_EXPR)
+ {
+ ref = TREE_OPERAND (ref, 0);
+ VEC_safe_push (tree, heap, access_fns, integer_one_node);
+ }
- while (handled_component_p (aref))
+ /* Analyze access functions of dimensions we know to be independent. */
+ aref = &ref;
+ while (handled_component_p (*aref))
{
- if (TREE_CODE (aref) == ARRAY_REF)
+ if (TREE_CODE (*aref) == ARRAY_REF)
{
- op = TREE_OPERAND (aref, 1);
+ op = TREE_OPERAND (*aref, 1);
access_fn = analyze_scalar_evolution (loop, op);
- access_fn = resolve_mixers (nest, access_fn);
+ access_fn = instantiate_scev (before_loop, loop, access_fn);
VEC_safe_push (tree, heap, access_fns, access_fn);
-
- TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
+ /* For ARRAY_REFs the base is the reference with the index replaced
+ by zero if we can not strip it as the outermost component. */
+ if (*aref == ref)
+ {
+ *aref = TREE_OPERAND (*aref, 0);
+ continue;
+ }
+ else
+ TREE_OPERAND (*aref, 1) = build_int_cst (TREE_TYPE (op), 0);
}
-
- aref = TREE_OPERAND (aref, 0);
+
+ aref = &TREE_OPERAND (*aref, 0);
}
- if (INDIRECT_REF_P (aref))
+ /* If the address operand of a MEM_REF base has an evolution in the
+ analyzed nest, add it as an additional independent access-function. */
+ if (TREE_CODE (*aref) == MEM_REF)
{
- op = TREE_OPERAND (aref, 0);
+ op = TREE_OPERAND (*aref, 0);
access_fn = analyze_scalar_evolution (loop, op);
- access_fn = resolve_mixers (nest, access_fn);
- base = initial_condition (access_fn);
- split_constant_offset (base, &base, &off);
- access_fn = chrec_replace_initial_condition (access_fn,
- fold_convert (TREE_TYPE (base), off));
-
- TREE_OPERAND (aref, 0) = base;
- VEC_safe_push (tree, heap, access_fns, access_fn);
+ access_fn = instantiate_scev (before_loop, loop, access_fn);
+ if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
+ {
+ tree orig_type;
+ base = initial_condition (access_fn);
+ orig_type = TREE_TYPE (base);
+ STRIP_USELESS_TYPE_CONVERSION (base);
+ split_constant_offset (base, &base, &off);
+ /* Fold the MEM_REF offset into the evolutions initial
+ value to make more bases comparable. */
+ if (!integer_zerop (TREE_OPERAND (*aref, 1)))
+ {
+ off = size_binop (PLUS_EXPR, off,
+ fold_convert (ssizetype,
+ TREE_OPERAND (*aref, 1)));
+ TREE_OPERAND (*aref, 1)
+ = build_int_cst (TREE_TYPE (TREE_OPERAND (*aref, 1)), 0);
+ }
+ access_fn = chrec_replace_initial_condition
+ (access_fn, fold_convert (orig_type, off));
+ *aref = fold_build2_loc (EXPR_LOCATION (*aref),
+ MEM_REF, TREE_TYPE (*aref),
+ base, TREE_OPERAND (*aref, 1));
+ VEC_safe_push (tree, heap, access_fns, access_fn);
+ }
}
DR_BASE_OBJECT (dr) = ref;
static void
dr_analyze_alias (struct data_reference *dr)
{
- tree stmt = DR_STMT (dr);
tree ref = DR_REF (dr);
- tree base = get_base_address (ref), addr, smt = NULL_TREE;
- ssa_op_iter it;
- tree op;
- bitmap vops;
+ tree base = get_base_address (ref), addr;
- if (DECL_P (base))
- smt = base;
- else if (INDIRECT_REF_P (base))
+ if (INDIRECT_REF_P (base)
+ || TREE_CODE (base) == MEM_REF)
{
addr = TREE_OPERAND (base, 0);
if (TREE_CODE (addr) == SSA_NAME)
- {
- smt = symbol_mem_tag (SSA_NAME_VAR (addr));
- DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
- }
- }
-
- DR_SYMBOL_TAG (dr) = smt;
- if (smt && var_can_have_subvars (smt))
- DR_SUBVARS (dr) = get_subvars_for_var (smt);
-
- vops = BITMAP_ALLOC (NULL);
- FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
- {
- bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
+ DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
}
-
- DR_VOPS (dr) = vops;
-}
-
-/* Returns true if the address of DR is invariant. */
-
-static bool
-dr_address_invariant_p (struct data_reference *dr)
-{
- unsigned i;
- tree idx;
-
- for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
- if (tree_contains_chrecs (idx, NULL))
- return false;
-
- return true;
}
/* Frees data reference DR. */
-static void
+void
free_data_ref (data_reference_p dr)
{
- BITMAP_FREE (DR_VOPS (dr));
VEC_free (tree, heap, DR_ACCESS_FNS (dr));
free (dr);
}
/* Analyzes memory reference MEMREF accessed in STMT. The reference
is read if IS_READ is true, write otherwise. Returns the
- data_reference description of MEMREF. NEST is the outermost loop of the
- loop nest in that the reference should be analyzed. */
+ data_reference description of MEMREF. NEST is the outermost loop
+ in which the reference should be instantiated, LOOP is the loop in
+ which the data reference should be analyzed. */
struct data_reference *
-create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
+create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
+ bool is_read)
{
struct data_reference *dr;
DR_REF (dr) = memref;
DR_IS_READ (dr) = is_read;
- dr_analyze_innermost (dr);
- dr_analyze_indices (dr, nest);
+ dr_analyze_innermost (dr, nest);
+ dr_analyze_indices (dr, nest, loop);
dr_analyze_alias (dr);
if (dump_file && (dump_flags & TDF_DETAILS))
{
+ unsigned i;
fprintf (dump_file, "\tbase_address: ");
print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
fprintf (dump_file, "\n\toffset from base address: ");
print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
fprintf (dump_file, "\n\tbase_object: ");
print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
- fprintf (dump_file, "\n\tsymbol tag: ");
- print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
fprintf (dump_file, "\n");
+ for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
+ {
+ fprintf (dump_file, "\tAccess function %d: ", i);
+ print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
+ }
}
- return dr;
+ return dr;
+}
+
+/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
+ expressions. */
+static bool
+dr_equal_offsets_p1 (tree offset1, tree offset2)
+{
+ bool res;
+
+ STRIP_NOPS (offset1);
+ STRIP_NOPS (offset2);
+
+ if (offset1 == offset2)
+ return true;
+
+ if (TREE_CODE (offset1) != TREE_CODE (offset2)
+ || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
+ return false;
+
+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
+ TREE_OPERAND (offset2, 0));
+
+ if (!res || !BINARY_CLASS_P (offset1))
+ return res;
+
+ res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
+ TREE_OPERAND (offset2, 1));
+
+ return res;
+}
+
+/* Check if DRA and DRB have equal offsets. */
+bool
+dr_equal_offsets_p (struct data_reference *dra,
+ struct data_reference *drb)
+{
+ tree offset1, offset2;
+
+ offset1 = DR_OFFSET (dra);
+ offset2 = DR_OFFSET (drb);
+
+ return dr_equal_offsets_p1 (offset1, offset2);
}
/* Returns true if FNA == FNB. */
&& affine_function_constant_p (fn));
}
+/* Returns a signed integer type with the largest precision from TA
+ and TB. */
+
+static tree
+signed_type_for_types (tree ta, tree tb)
+{
+ if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
+ return signed_type_for (ta);
+ else
+ return signed_type_for (tb);
+}
+
/* Applies operation OP on affine functions FNA and FNB, and returns the
result. */
ret = VEC_alloc (tree, heap, m);
for (i = 0; i < n; i++)
- VEC_quick_push (tree, ret,
- fold_build2 (op, integer_type_node,
- VEC_index (tree, fna, i),
- VEC_index (tree, fnb, i)));
+ {
+ tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
+ TREE_TYPE (VEC_index (tree, fnb, i)));
+
+ VEC_quick_push (tree, ret,
+ fold_build2 (op, type,
+ VEC_index (tree, fna, i),
+ VEC_index (tree, fnb, i)));
+ }
for (; VEC_iterate (tree, fna, i, coef); i++)
VEC_quick_push (tree, ret,
- fold_build2 (op, integer_type_node,
+ fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
coef, integer_zero_node));
for (; VEC_iterate (tree, fnb, i, coef); i++)
VEC_quick_push (tree, ret,
- fold_build2 (op, integer_type_node,
+ fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
integer_zero_node, coef));
return ret;
if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
{
unsigned int i;
-
+
for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
{
struct subscript *subscript;
-
+
subscript = DDR_SUBSCRIPT (ddr, i);
cf_a = SUB_CONFLICTS_IN_A (subscript);
cf_b = SUB_CONFLICTS_IN_B (subscript);
return;
}
diff = affine_fn_minus (fn_a, fn_b);
-
+
if (affine_function_constant_p (diff))
SUB_DISTANCE (subscript) = affine_function_base (diff);
else
/* Returns true if the address of OBJ is invariant in LOOP. */
static bool
-object_address_invariant_in_loop_p (struct loop *loop, tree obj)
+object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
{
while (handled_component_p (obj))
{
obj = TREE_OPERAND (obj, 0);
}
- if (!INDIRECT_REF_P (obj))
+ if (!INDIRECT_REF_P (obj)
+ && TREE_CODE (obj) != MEM_REF)
return true;
return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
loop->num);
}
-/* Returns true if A and B are accesses to different objects, or to different
- fields of the same object. */
+/* Returns false if we can prove that data references A and B do not alias,
+ true otherwise. If LOOP_NEST is false no cross-iteration aliases are
+ considered. */
-static bool
-disjoint_objects_p (tree a, tree b)
+bool
+dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
+ bool loop_nest)
{
- tree base_a, base_b;
- VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
- bool ret;
+ tree addr_a = DR_BASE_OBJECT (a);
+ tree addr_b = DR_BASE_OBJECT (b);
+
+ /* If we are not processing a loop nest but scalar code we
+ do not need to care about possible cross-iteration dependences
+ and thus can process the full original reference. Do so,
+ similar to how loop invariant motion applies extra offset-based
+ disambiguation. */
+ if (!loop_nest)
+ {
+ aff_tree off1, off2;
+ double_int size1, size2;
+ get_inner_reference_aff (DR_REF (a), &off1, &size1);
+ get_inner_reference_aff (DR_REF (b), &off2, &size2);
+ aff_combination_scale (&off1, double_int_minus_one);
+ aff_combination_add (&off2, &off1);
+ if (aff_comb_cannot_overlap_p (&off2, size1, size2))
+ return false;
+ }
- base_a = get_base_address (a);
- base_b = get_base_address (b);
+ if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
+ return refs_output_dependent_p (addr_a, addr_b);
+ else if (DR_IS_READ (a) && DR_IS_WRITE (b))
+ return refs_anti_dependent_p (addr_a, addr_b);
+ return refs_may_alias_p (addr_a, addr_b);
+}
- if (DECL_P (base_a)
- && DECL_P (base_b)
- && base_a != base_b)
- return true;
+/* Initialize a data dependence relation between data accesses A and
+ B. NB_LOOPS is the number of loops surrounding the references: the
+ size of the classic distance/direction vectors. */
- if (!operand_equal_p (base_a, base_b, 0))
- return false;
+struct data_dependence_relation *
+initialize_data_dependence_relation (struct data_reference *a,
+ struct data_reference *b,
+ VEC (loop_p, heap) *loop_nest)
+{
+ struct data_dependence_relation *res;
+ unsigned int i;
- /* Compare the component references of A and B. We must start from the inner
- ones, so record them to the vector first. */
- while (handled_component_p (a))
- {
- VEC_safe_push (tree, heap, comp_a, a);
- a = TREE_OPERAND (a, 0);
- }
- while (handled_component_p (b))
+ res = XNEW (struct data_dependence_relation);
+ DDR_A (res) = a;
+ DDR_B (res) = b;
+ DDR_LOOP_NEST (res) = NULL;
+ DDR_REVERSED_P (res) = false;
+ DDR_SUBSCRIPTS (res) = NULL;
+ DDR_DIR_VECTS (res) = NULL;
+ DDR_DIST_VECTS (res) = NULL;
+
+ if (a == NULL || b == NULL)
{
- VEC_safe_push (tree, heap, comp_b, b);
- b = TREE_OPERAND (b, 0);
+ DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+ return res;
}
- ret = false;
- while (1)
- {
- if (VEC_length (tree, comp_a) == 0
- || VEC_length (tree, comp_b) == 0)
- break;
-
- a = VEC_pop (tree, comp_a);
- b = VEC_pop (tree, comp_b);
-
- /* Real and imaginary part of a variable do not alias. */
- if ((TREE_CODE (a) == REALPART_EXPR
- && TREE_CODE (b) == IMAGPART_EXPR)
- || (TREE_CODE (a) == IMAGPART_EXPR
- && TREE_CODE (b) == REALPART_EXPR))
- {
- ret = true;
- break;
- }
-
- if (TREE_CODE (a) != TREE_CODE (b))
- break;
-
- /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
- DR_BASE_OBJECT are always zero. */
- if (TREE_CODE (a) == ARRAY_REF)
- continue;
- else if (TREE_CODE (a) == COMPONENT_REF)
- {
- if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
- continue;
-
- /* Different fields of unions may overlap. */
- base_a = TREE_OPERAND (a, 0);
- if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
- break;
-
- /* Different fields of structures cannot. */
- ret = true;
- break;
- }
- else
- break;
- }
-
- VEC_free (tree, heap, comp_a);
- VEC_free (tree, heap, comp_b);
-
- return ret;
-}
-
-/* Returns false if we can prove that data references A and B do not alias,
- true otherwise. */
-
-static bool
-dr_may_alias_p (struct data_reference *a, struct data_reference *b)
-{
- tree addr_a = DR_BASE_ADDRESS (a);
- tree addr_b = DR_BASE_ADDRESS (b);
- tree type_a, type_b;
- tree decl_a = NULL_TREE, decl_b = NULL_TREE;
-
- /* If the sets of virtual operands are disjoint, the memory references do not
- alias. */
- if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
- return false;
-
- /* If the accessed objects are disjoint, the memory references do not
- alias. */
- if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
- return false;
-
- if (!addr_a || !addr_b)
- return true;
-
- /* If the references are based on different static objects, they cannot alias
- (PTA should be able to disambiguate such accesses, but often it fails to,
- since currently we cannot distinguish between pointer and offset in pointer
- arithmetics). */
- if (TREE_CODE (addr_a) == ADDR_EXPR
- && TREE_CODE (addr_b) == ADDR_EXPR)
- return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
-
- /* An instruction writing through a restricted pointer is "independent" of any
- instruction reading or writing through a different restricted pointer,
- in the same block/scope. */
-
- type_a = TREE_TYPE (addr_a);
- type_b = TREE_TYPE (addr_b);
- gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
-
- if (TREE_CODE (addr_a) == SSA_NAME)
- decl_a = SSA_NAME_VAR (addr_a);
- if (TREE_CODE (addr_b) == SSA_NAME)
- decl_b = SSA_NAME_VAR (addr_b);
-
- if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
- && (!DR_IS_READ (a) || !DR_IS_READ (b))
- && decl_a && DECL_P (decl_a)
- && decl_b && DECL_P (decl_b)
- && decl_a != decl_b
- && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
- && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
- return false;
-
- return true;
-}
-
-/* Initialize a data dependence relation between data accesses A and
- B. NB_LOOPS is the number of loops surrounding the references: the
- size of the classic distance/direction vectors. */
-
-static struct data_dependence_relation *
-initialize_data_dependence_relation (struct data_reference *a,
- struct data_reference *b,
- VEC (loop_p, heap) *loop_nest)
-{
- struct data_dependence_relation *res;
- unsigned int i;
-
- res = XNEW (struct data_dependence_relation);
- DDR_A (res) = a;
- DDR_B (res) = b;
- DDR_LOOP_NEST (res) = NULL;
- DDR_REVERSED_P (res) = false;
-
- if (a == NULL || b == NULL)
+ /* If the data references do not alias, then they are independent. */
+ if (!dr_may_alias_p (a, b, loop_nest != NULL))
{
- DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+ DDR_ARE_DEPENDENT (res) = chrec_known;
return res;
- }
+ }
- /* If the data references do not alias, then they are independent. */
- if (!dr_may_alias_p (a, b))
+ /* When the references are exactly the same, don't spend time doing
+ the data dependence tests, just initialize the ddr and return. */
+ if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
{
- DDR_ARE_DEPENDENT (res) = chrec_known;
+ DDR_AFFINE_P (res) = true;
+ DDR_ARE_DEPENDENT (res) = NULL_TREE;
+ DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
+ DDR_LOOP_NEST (res) = loop_nest;
+ DDR_INNER_LOOP (res) = 0;
+ DDR_SELF_REFERENCE (res) = true;
+ compute_self_dependence (res);
return res;
}
whether they alias or not. */
if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
{
- DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+ DDR_ARE_DEPENDENT (res) = chrec_dont_know;
return res;
}
/* If the base of the object is not invariant in the loop nest, we cannot
analyze it. TODO -- in fact, it would suffice to record that there may
be arbitrary dependences in the loops where the base object varies. */
- if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
- DR_BASE_OBJECT (a)))
+ if (loop_nest
+ && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
+ DR_BASE_OBJECT (a)))
{
- DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+ DDR_ARE_DEPENDENT (res) = chrec_dont_know;
return res;
}
- gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
+ /* If the number of dimensions of the access to not agree we can have
+ a pointer access to a component of the array element type and an
+ array access while the base-objects are still the same. Punt. */
+ if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
+ {
+ DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+ return res;
+ }
DDR_AFFINE_P (res) = true;
DDR_ARE_DEPENDENT (res) = NULL_TREE;
DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
DDR_LOOP_NEST (res) = loop_nest;
DDR_INNER_LOOP (res) = 0;
- DDR_DIR_VECTS (res) = NULL;
- DDR_DIST_VECTS (res) = NULL;
+ DDR_SELF_REFERENCE (res) = false;
for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
{
struct subscript *subscript;
-
+
subscript = XNEW (struct subscript);
SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
unsigned i;
subscript_p s;
- for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
+ FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
{
free_conflict_function (s->conflicting_iterations_in_a);
free_conflict_function (s->conflicting_iterations_in_b);
+ free (s);
}
VEC_free (subscript_p, heap, subscripts);
}
description. */
static inline void
-finalize_ddr_dependent (struct data_dependence_relation *ddr,
+finalize_ddr_dependent (struct data_dependence_relation *ddr,
tree chrec)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, ")\n");
}
- DDR_ARE_DEPENDENT (ddr) = chrec;
+ DDR_ARE_DEPENDENT (ddr) = chrec;
free_subscripts (DDR_SUBSCRIPTS (ddr));
+ DDR_SUBSCRIPTS (ddr) = NULL;
}
/* The dependence relation DDR cannot be represented by a distance
variables, i.e., if the ZIV (Zero Index Variable) test is true. */
static inline bool
-ziv_subscript_p (tree chrec_a,
- tree chrec_b)
+ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
{
return (evolution_function_is_constant_p (chrec_a)
&& evolution_function_is_constant_p (chrec_b));
variable, i.e., if the SIV (Single Index Variable) test is true. */
static bool
-siv_subscript_p (tree chrec_a,
- tree chrec_b)
+siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
{
if ((evolution_function_is_constant_p (chrec_a)
&& evolution_function_is_univariate_p (chrec_b))
|| (evolution_function_is_constant_p (chrec_b)
&& evolution_function_is_univariate_p (chrec_a)))
return true;
-
+
if (evolution_function_is_univariate_p (chrec_a)
&& evolution_function_is_univariate_p (chrec_b))
{
case POLYNOMIAL_CHREC:
if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
return false;
-
+
default:
return true;
}
-
+
default:
return true;
}
}
-
+
return false;
}
gcc_assert (0 < n && n <= MAX_DIM);
va_start(ap, n);
-
+
ret->n = n;
for (i = 0; i < n; i++)
ret->fns[i] = va_arg (ap, affine_fn);
CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
-static void
-analyze_ziv_subscript (tree chrec_a,
- tree chrec_b,
+static void
+analyze_ziv_subscript (tree chrec_a,
+ tree chrec_b,
conflict_function **overlaps_a,
- conflict_function **overlaps_b,
+ conflict_function **overlaps_b,
tree *last_conflicts)
{
- tree difference;
+ tree type, difference;
dependence_stats.num_ziv++;
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "(analyze_ziv_subscript \n");
-
- chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
- chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
- difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
-
+
+ type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
+ chrec_a = chrec_convert (type, chrec_a, NULL);
+ chrec_b = chrec_convert (type, chrec_b, NULL);
+ difference = chrec_fold_minus (type, chrec_a, chrec_b);
+
switch (TREE_CODE (difference))
{
case INTEGER_CST:
dependence_stats.num_ziv_independent++;
}
break;
-
+
default:
- /* We're not sure whether the indexes overlap. For the moment,
+ /* We're not sure whether the indexes overlap. For the moment,
conservatively answer "don't know". */
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
dependence_stats.num_ziv_unimplemented++;
break;
}
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, ")\n");
}
-/* Sets NIT to the estimated number of executions of the statements in
- LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
- large as the number of iterations. If we have no reliable estimate,
- the function returns false, otherwise returns true. */
-
-bool
-estimated_loop_iterations (struct loop *loop, bool conservative,
- double_int *nit)
-{
- estimate_numbers_of_iterations_loop (loop);
- if (conservative)
- {
- if (!loop->any_upper_bound)
- return false;
-
- *nit = loop->nb_iterations_upper_bound;
- }
- else
- {
- if (!loop->any_estimate)
- return false;
-
- *nit = loop->nb_iterations_estimate;
- }
-
- return true;
-}
-
-/* Similar to estimated_loop_iterations, but returns the estimate only
- if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
- on the number of iterations of LOOP could not be derived, returns -1. */
-
-HOST_WIDE_INT
-estimated_loop_iterations_int (struct loop *loop, bool conservative)
-{
- double_int nit;
- HOST_WIDE_INT hwi_nit;
-
- if (!estimated_loop_iterations (loop, conservative, &nit))
- return -1;
-
- if (!double_int_fits_in_shwi_p (nit))
- return -1;
- hwi_nit = double_int_to_shwi (nit);
-
- return hwi_nit < 0 ? -1 : hwi_nit;
-}
-
-/* Similar to estimated_loop_iterations, but returns the estimate as a tree,
+/* Similar to max_stmt_executions_int, but returns the bound as a tree,
and only if it fits to the int type. If this is not the case, or the
- estimate on the number of iterations of LOOP could not be derived, returns
+ bound on the number of iterations of LOOP could not be derived, returns
chrec_dont_know. */
static tree
-estimated_loop_iterations_tree (struct loop *loop, bool conservative)
+max_stmt_executions_tree (struct loop *loop)
{
double_int nit;
- tree type;
- if (!estimated_loop_iterations (loop, conservative, &nit))
+ if (!max_stmt_executions (loop, true, &nit))
return chrec_dont_know;
- type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
- if (!double_int_fits_to_tree_p (type, nit))
+ if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
return chrec_dont_know;
- return double_int_to_tree (type, nit);
+ return double_int_to_tree (unsigned_type_node, nit);
}
/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
static void
-analyze_siv_subscript_cst_affine (tree chrec_a,
+analyze_siv_subscript_cst_affine (tree chrec_a,
tree chrec_b,
- conflict_function **overlaps_a,
- conflict_function **overlaps_b,
+ conflict_function **overlaps_a,
+ conflict_function **overlaps_b,
tree *last_conflicts)
{
bool value0, value1, value2;
- tree difference, tmp;
+ tree type, difference, tmp;
+
+ type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
+ chrec_a = chrec_convert (type, chrec_a, NULL);
+ chrec_b = chrec_convert (type, chrec_b, NULL);
+ difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
- chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
- chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
- difference = chrec_fold_minus
- (integer_type_node, initial_condition (chrec_b), chrec_a);
-
if (!chrec_is_positive (initial_condition (difference), &value0))
{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "siv test failed: chrec is not positive.\n");
+ fprintf (dump_file, "siv test failed: chrec is not positive.\n");
dependence_stats.num_siv_unimplemented++;
*overlaps_a = conflict_fn_not_known ();
fprintf (dump_file, "siv test failed: chrec not positive.\n");
*overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
*last_conflicts = chrec_dont_know;
dependence_stats.num_siv_unimplemented++;
return;
{
if (value1 == true)
{
- /* Example:
+ /* Example:
chrec_a = 12
chrec_b = {10, +, 1}
*/
-
+
if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
{
HOST_WIDE_INT numiter;
struct loop *loop = get_chrec_loop (chrec_b);
*overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
- tmp = fold_build2 (EXACT_DIV_EXPR, integer_type_node,
- fold_build1 (ABS_EXPR,
- integer_type_node,
- difference),
+ tmp = fold_build2 (EXACT_DIV_EXPR, type,
+ fold_build1 (ABS_EXPR, type, difference),
CHREC_RIGHT (chrec_b));
*overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
*last_conflicts = integer_one_node;
-
+
/* Perform weak-zero siv test to see if overlap is
outside the loop bounds. */
- numiter = estimated_loop_iterations_int (loop, false);
+ numiter = max_stmt_executions_int (loop, true);
if (numiter >= 0
&& compare_tree_int (tmp, numiter) > 0)
*last_conflicts = integer_zero_node;
dependence_stats.num_siv_independent++;
return;
- }
+ }
dependence_stats.num_siv_dependent++;
return;
}
-
+
/* When the step does not divide the difference, there are
no overlaps. */
else
{
*overlaps_a = conflict_fn_no_dependence ();
- *overlaps_b = conflict_fn_no_dependence ();
+ *overlaps_b = conflict_fn_no_dependence ();
*last_conflicts = integer_zero_node;
dependence_stats.num_siv_independent++;
return;
}
}
-
+
else
{
- /* Example:
+ /* Example:
chrec_a = 12
chrec_b = {10, +, -1}
-
+
In this case, chrec_a will not overlap with chrec_b. */
*overlaps_a = conflict_fn_no_dependence ();
*overlaps_b = conflict_fn_no_dependence ();
}
}
}
- else
+ else
{
if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
{
fprintf (dump_file, "siv test failed: chrec not positive.\n");
*overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
*last_conflicts = chrec_dont_know;
dependence_stats.num_siv_unimplemented++;
return;
{
if (value2 == false)
{
- /* Example:
+ /* Example:
chrec_a = 3
chrec_b = {10, +, -1}
*/
struct loop *loop = get_chrec_loop (chrec_b);
*overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
- tmp = fold_build2 (EXACT_DIV_EXPR,
- integer_type_node, difference,
+ tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
CHREC_RIGHT (chrec_b));
*overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
*last_conflicts = integer_one_node;
/* Perform weak-zero siv test to see if overlap is
outside the loop bounds. */
- numiter = estimated_loop_iterations_int (loop, false);
+ numiter = max_stmt_executions_int (loop, true);
if (numiter >= 0
&& compare_tree_int (tmp, numiter) > 0)
*last_conflicts = integer_zero_node;
dependence_stats.num_siv_independent++;
return;
- }
+ }
dependence_stats.num_siv_dependent++;
return;
}
-
+
/* When the step does not divide the difference, there
are no overlaps. */
else
{
*overlaps_a = conflict_fn_no_dependence ();
- *overlaps_b = conflict_fn_no_dependence ();
+ *overlaps_b = conflict_fn_no_dependence ();
*last_conflicts = integer_zero_node;
dependence_stats.num_siv_independent++;
return;
}
else
{
- /* Example:
- chrec_a = 3
+ /* Example:
+ chrec_a = 3
chrec_b = {4, +, 1}
-
+
In this case, chrec_a will not overlap with chrec_b. */
*overlaps_a = conflict_fn_no_dependence ();
*overlaps_b = conflict_fn_no_dependence ();
/* Helper recursive function for initializing the matrix A. Returns
the initial value of CHREC. */
-static int
+static tree
initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
{
gcc_assert (chrec);
- if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
- return int_cst_value (chrec);
+ switch (TREE_CODE (chrec))
+ {
+ case POLYNOMIAL_CHREC:
+ gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
+
+ A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
+ return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
+
+ case PLUS_EXPR:
+ case MULT_EXPR:
+ case MINUS_EXPR:
+ {
+ tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
+ tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
+
+ return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
+ }
+
+ case NOP_EXPR:
+ {
+ tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
+ return chrec_convert (chrec_type (chrec), op, NULL);
+ }
+
+ case BIT_NOT_EXPR:
+ {
+ /* Handle ~X as -1 - X. */
+ tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
+ return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
+ build_int_cst (TREE_TYPE (chrec), -1), op);
+ }
+
+ case INTEGER_CST:
+ return chrec;
- A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
- return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
+ default:
+ gcc_unreachable ();
+ return NULL_TREE;
+ }
}
#define FLOOR_DIV(x,y) ((x) / (y))
-/* Solves the special case of the Diophantine equation:
+/* Solves the special case of the Diophantine equation:
| {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
constructed as evolutions in dimension DIM. */
static void
-compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
+compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
affine_fn *overlaps_a,
- affine_fn *overlaps_b,
+ affine_fn *overlaps_b,
tree *last_conflicts, int dim)
{
if (((step_a > 0 && step_b > 0)
step_overlaps_a = step_b / gcd_steps_a_b;
step_overlaps_b = step_a / gcd_steps_a_b;
- tau2 = FLOOR_DIV (niter, step_overlaps_a);
- tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
- last_conflict = tau2;
+ if (niter > 0)
+ {
+ tau2 = FLOOR_DIV (niter, step_overlaps_a);
+ tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
+ last_conflict = tau2;
+ *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
+ }
+ else
+ *last_conflicts = chrec_dont_know;
- *overlaps_a = affine_fn_univar (integer_zero_node, dim,
+ *overlaps_a = affine_fn_univar (integer_zero_node, dim,
build_int_cst (NULL_TREE,
step_overlaps_a));
- *overlaps_b = affine_fn_univar (integer_zero_node, dim,
- build_int_cst (NULL_TREE,
+ *overlaps_b = affine_fn_univar (integer_zero_node, dim,
+ build_int_cst (NULL_TREE,
step_overlaps_b));
- *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
}
else
/* Solves the special case of a Diophantine equation where CHREC_A is
an affine bivariate function, and CHREC_B is an affine univariate
- function. For example,
+ function. For example,
| {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
-
- has the following overlapping functions:
+
+ has the following overlapping functions:
| x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
| y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
a common benchmark. Implement the general algorithm. */
static void
-compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
+compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
conflict_function **overlaps_a,
- conflict_function **overlaps_b,
+ conflict_function **overlaps_b,
tree *last_conflicts)
{
bool xz_p, yz_p, xyz_p;
step_y = int_cst_value (CHREC_RIGHT (chrec_a));
step_z = int_cst_value (CHREC_RIGHT (chrec_b));
- niter_x =
- estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
- false);
- niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
- niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
-
+ niter_x =
+ max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
+ niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
+ niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
+
if (niter_x < 0 || niter_y < 0 || niter_z < 0)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
-
+
*overlaps_a = conflict_fn_not_known ();
*overlaps_b = conflict_fn_not_known ();
*last_conflicts = chrec_dont_know;
affine_fn_free (overlaps_b_xyz);
}
+/* Copy the elements of vector VEC1 with length SIZE to VEC2. */
+
+static void
+lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
+ int size)
+{
+ memcpy (vec2, vec1, size * sizeof (*vec1));
+}
+
+/* Copy the elements of M x N matrix MAT1 to MAT2. */
+
+static void
+lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
+ int m, int n)
+{
+ int i;
+
+ for (i = 0; i < m; i++)
+ lambda_vector_copy (mat1[i], mat2[i], n);
+}
+
+/* Store the N x N identity matrix in MAT. */
+
+static void
+lambda_matrix_id (lambda_matrix mat, int size)
+{
+ int i, j;
+
+ for (i = 0; i < size; i++)
+ for (j = 0; j < size; j++)
+ mat[i][j] = (i == j) ? 1 : 0;
+}
+
+/* Return the first nonzero element of vector VEC1 between START and N.
+ We must have START <= N. Returns N if VEC1 is the zero vector. */
+
+static int
+lambda_vector_first_nz (lambda_vector vec1, int n, int start)
+{
+ int j = start;
+ while (j < n && vec1[j] == 0)
+ j++;
+ return j;
+}
+
+/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
+ R2 = R2 + CONST1 * R1. */
+
+static void
+lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
+{
+ int i;
+
+ if (const1 == 0)
+ return;
+
+ for (i = 0; i < n; i++)
+ mat[r2][i] += const1 * mat[r1][i];
+}
+
+/* Swap rows R1 and R2 in matrix MAT. */
+
+static void
+lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
+{
+ lambda_vector row;
+
+ row = mat[r1];
+ mat[r1] = mat[r2];
+ mat[r2] = row;
+}
+
+/* Multiply vector VEC1 of length SIZE by a constant CONST1,
+ and store the result in VEC2. */
+
+static void
+lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
+ int size, int const1)
+{
+ int i;
+
+ if (const1 == 0)
+ lambda_vector_clear (vec2, size);
+ else
+ for (i = 0; i < size; i++)
+ vec2[i] = const1 * vec1[i];
+}
+
+/* Negate vector VEC1 with length SIZE and store it in VEC2. */
+
+static void
+lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
+ int size)
+{
+ lambda_vector_mult_const (vec1, vec2, size, -1);
+}
+
+/* Negate row R1 of matrix MAT which has N columns. */
+
+static void
+lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
+{
+ lambda_vector_negate (mat[r1], mat[r1], n);
+}
+
+/* Return true if two vectors are equal. */
+
+static bool
+lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ if (vec1[i] != vec2[i])
+ return false;
+ return true;
+}
+
+/* Given an M x N integer matrix A, this function determines an M x
+ M unimodular matrix U, and an M x N echelon matrix S such that
+ "U.A = S". This decomposition is also known as "right Hermite".
+
+ Ref: Algorithm 2.1 page 33 in "Loop Transformations for
+ Restructuring Compilers" Utpal Banerjee. */
+
+static void
+lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
+ lambda_matrix S, lambda_matrix U)
+{
+ int i, j, i0 = 0;
+
+ lambda_matrix_copy (A, S, m, n);
+ lambda_matrix_id (U, m);
+
+ for (j = 0; j < n; j++)
+ {
+ if (lambda_vector_first_nz (S[j], m, i0) < m)
+ {
+ ++i0;
+ for (i = m - 1; i >= i0; i--)
+ {
+ while (S[i][j] != 0)
+ {
+ int sigma, factor, a, b;
+
+ a = S[i-1][j];
+ b = S[i][j];
+ sigma = (a * b < 0) ? -1: 1;
+ a = abs (a);
+ b = abs (b);
+ factor = sigma * (a / b);
+
+ lambda_matrix_row_add (S, n, i, i-1, -factor);
+ lambda_matrix_row_exchange (S, i, i-1);
+
+ lambda_matrix_row_add (U, m, i, i-1, -factor);
+ lambda_matrix_row_exchange (U, i, i-1);
+ }
+ }
+ }
+ }
+}
+
/* Determines the overlapping elements due to accesses CHREC_A and
CHREC_B, that are affine functions. This function cannot handle
symbolic evolution functions, ie. when initial conditions are
parameters, because it uses lambda matrices of integers. */
static void
-analyze_subscript_affine_affine (tree chrec_a,
+analyze_subscript_affine_affine (tree chrec_a,
tree chrec_b,
- conflict_function **overlaps_a,
- conflict_function **overlaps_b,
+ conflict_function **overlaps_a,
+ conflict_function **overlaps_b,
tree *last_conflicts)
{
unsigned nb_vars_a, nb_vars_b, dim;
HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
- HOST_WIDE_INT tau1, tau2;
lambda_matrix A, U, S;
+ struct obstack scratch_obstack;
if (eq_evolutions_p (chrec_a, chrec_b))
{
}
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "(analyze_subscript_affine_affine \n");
-
+
/* For determining the initial intersection, we have to solve a
Diophantine equation. This is the most time consuming part.
-
+
For answering to the question: "Is there a dependence?" we have
to prove that there exists a solution to the Diophantine
equation, and that the solution is in the iteration domain,
nb_vars_a = nb_vars_in_chrec (chrec_a);
nb_vars_b = nb_vars_in_chrec (chrec_b);
+ gcc_obstack_init (&scratch_obstack);
+
dim = nb_vars_a + nb_vars_b;
- U = lambda_matrix_new (dim, dim);
- A = lambda_matrix_new (dim, 1);
- S = lambda_matrix_new (dim, 1);
+ U = lambda_matrix_new (dim, dim, &scratch_obstack);
+ A = lambda_matrix_new (dim, 1, &scratch_obstack);
+ S = lambda_matrix_new (dim, 1, &scratch_obstack);
- init_a = initialize_matrix_A (A, chrec_a, 0, 1);
- init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
+ init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
+ init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
gamma = init_b - init_a;
/* Don't do all the hard work of solving the Diophantine equation
- when we already know the solution: for example,
+ when we already know the solution: for example,
| {3, +, 1}_1
| {3, +, 4}_2
| gamma = 3 - 3 = 0.
- Then the first overlap occurs during the first iterations:
+ Then the first overlap occurs during the first iterations:
| {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
*/
if (gamma == 0)
HOST_WIDE_INT niter, niter_a, niter_b;
affine_fn ova, ovb;
- niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
- false);
- niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
- false);
- if (niter_a < 0 || niter_b < 0)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
- *overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
- *last_conflicts = chrec_dont_know;
- goto end_analyze_subs_aa;
- }
-
+ niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
+ niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
niter = MIN (niter_a, niter_b);
-
step_a = int_cst_value (CHREC_RIGHT (chrec_a));
step_b = int_cst_value (CHREC_RIGHT (chrec_b));
- compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
- &ova, &ovb,
+ compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
+ &ova, &ovb,
last_conflicts, 1);
*overlaps_a = conflict_fn (1, ova);
*overlaps_b = conflict_fn (1, ovb);
|| (A[0][0] < 0 && -A[1][0] < 0)))
{
/* The solutions are given by:
- |
+ |
| [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
| [u21 u22] [y0]
-
+
For a given integer t. Using the following variables,
-
+
| i0 = u11 * gamma / gcd_alpha_beta
| j0 = u12 * gamma / gcd_alpha_beta
| i1 = u21
| j1 = u22
-
+
the solutions are:
-
- | x0 = i0 + i1 * t,
- | y0 = j0 + j1 * t. */
-
- HOST_WIDE_INT i0, j0, i1, j1;
- /* X0 and Y0 are the first iterations for which there is a
- dependence. X0, Y0 are two solutions of the Diophantine
- equation: chrec_a (X0) = chrec_b (Y0). */
- HOST_WIDE_INT x0, y0;
- HOST_WIDE_INT niter, niter_a, niter_b;
+ | x0 = i0 + i1 * t,
+ | y0 = j0 + j1 * t. */
+ HOST_WIDE_INT i0, j0, i1, j1;
- niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
- false);
- niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
- false);
+ i0 = U[0][0] * gamma / gcd_alpha_beta;
+ j0 = U[0][1] * gamma / gcd_alpha_beta;
+ i1 = U[1][0];
+ j1 = U[1][1];
- if (niter_a < 0 || niter_b < 0)
+ if ((i1 == 0 && i0 < 0)
+ || (j1 == 0 && j0 < 0))
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
- *overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
- *last_conflicts = chrec_dont_know;
- goto end_analyze_subs_aa;
- }
-
- niter = MIN (niter_a, niter_b);
-
- i0 = U[0][0] * gamma / gcd_alpha_beta;
- j0 = U[0][1] * gamma / gcd_alpha_beta;
- i1 = U[1][0];
- j1 = U[1][1];
-
- if ((i1 == 0 && i0 < 0)
- || (j1 == 0 && j0 < 0))
- {
- /* There is no solution.
- FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
- falls in here, but for the moment we don't look at the
+ /* There is no solution.
+ FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
+ falls in here, but for the moment we don't look at the
upper bound of the iteration domain. */
*overlaps_a = conflict_fn_no_dependence ();
*overlaps_b = conflict_fn_no_dependence ();
*last_conflicts = integer_zero_node;
+ goto end_analyze_subs_aa;
}
- else
+ if (i1 > 0 && j1 > 0)
{
- if (i1 > 0)
+ HOST_WIDE_INT niter_a = max_stmt_executions_int
+ (get_chrec_loop (chrec_a), true);
+ HOST_WIDE_INT niter_b = max_stmt_executions_int
+ (get_chrec_loop (chrec_b), true);
+ HOST_WIDE_INT niter = MIN (niter_a, niter_b);
+
+ /* (X0, Y0) is a solution of the Diophantine equation:
+ "chrec_a (X0) = chrec_b (Y0)". */
+ HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
+ CEIL (-j0, j1));
+ HOST_WIDE_INT x0 = i1 * tau1 + i0;
+ HOST_WIDE_INT y0 = j1 * tau1 + j0;
+
+ /* (X1, Y1) is the smallest positive solution of the eq
+ "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
+ first conflict occurs. */
+ HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
+ HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
+ HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
+
+ if (niter > 0)
{
- tau1 = CEIL (-i0, i1);
- tau2 = FLOOR_DIV (niter - i0, i1);
+ HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
+ FLOOR_DIV (niter - j0, j1));
+ HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
- if (j1 > 0)
+ /* If the overlap occurs outside of the bounds of the
+ loop, there is no dependence. */
+ if (x1 >= niter || y1 >= niter)
{
- int last_conflict, min_multiple;
- tau1 = MAX (tau1, CEIL (-j0, j1));
- tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1));
-
- x0 = i1 * tau1 + i0;
- y0 = j1 * tau1 + j0;
-
- /* At this point (x0, y0) is one of the
- solutions to the Diophantine equation. The
- next step has to compute the smallest
- positive solution: the first conflicts. */
- min_multiple = MIN (x0 / i1, y0 / j1);
- x0 -= i1 * min_multiple;
- y0 -= j1 * min_multiple;
-
- tau1 = (x0 - i0)/i1;
- last_conflict = tau2 - tau1;
-
- /* If the overlap occurs outside of the bounds of the
- loop, there is no dependence. */
- if (x0 > niter || y0 > niter)
- {
- *overlaps_a = conflict_fn_no_dependence ();
- *overlaps_b = conflict_fn_no_dependence ();
- *last_conflicts = integer_zero_node;
- }
- else
- {
- *overlaps_a
- = conflict_fn (1,
- affine_fn_univar (build_int_cst (NULL_TREE, x0),
- 1,
- build_int_cst (NULL_TREE, i1)));
- *overlaps_b
- = conflict_fn (1,
- affine_fn_univar (build_int_cst (NULL_TREE, y0),
- 1,
- build_int_cst (NULL_TREE, j1)));
- *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
- }
+ *overlaps_a = conflict_fn_no_dependence ();
+ *overlaps_b = conflict_fn_no_dependence ();
+ *last_conflicts = integer_zero_node;
+ goto end_analyze_subs_aa;
}
else
- {
- /* FIXME: For the moment, the upper bound of the
- iteration domain for j is not checked. */
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
- *overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
- *last_conflicts = chrec_dont_know;
- }
+ *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
}
-
else
- {
- /* FIXME: For the moment, the upper bound of the
- iteration domain for i is not checked. */
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
- *overlaps_a = conflict_fn_not_known ();
- *overlaps_b = conflict_fn_not_known ();
- *last_conflicts = chrec_dont_know;
- }
+ *last_conflicts = chrec_dont_know;
+
+ *overlaps_a
+ = conflict_fn (1,
+ affine_fn_univar (build_int_cst (NULL_TREE, x1),
+ 1,
+ build_int_cst (NULL_TREE, i1)));
+ *overlaps_b
+ = conflict_fn (1,
+ affine_fn_univar (build_int_cst (NULL_TREE, y1),
+ 1,
+ build_int_cst (NULL_TREE, j1)));
+ }
+ else
+ {
+ /* FIXME: For the moment, the upper bound of the
+ iteration domain for i and j is not checked. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
+ *overlaps_a = conflict_fn_not_known ();
+ *overlaps_b = conflict_fn_not_known ();
+ *last_conflicts = chrec_dont_know;
}
}
else
*last_conflicts = chrec_dont_know;
}
}
-
else
{
if (dump_file && (dump_flags & TDF_DETAILS))
*last_conflicts = chrec_dont_know;
}
-end_analyze_subs_aa:
+end_analyze_subs_aa:
+ obstack_free (&scratch_obstack, NULL);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, " (overlaps_a = ");
determining the dependence relation between chrec_a and chrec_b,
that contain symbols. This function modifies chrec_a and chrec_b
such that the analysis result is the same, and such that they don't
- contain symbols, and then can safely be passed to the analyzer.
+ contain symbols, and then can safely be passed to the analyzer.
Example: The analysis of the following tuples of evolutions produce
the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
vs. {0, +, 1}_1
-
+
{x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
{-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
*/
type = chrec_type (*chrec_a);
left_a = CHREC_LEFT (*chrec_a);
- left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
+ left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
diff = chrec_fold_minus (type, left_a, left_b);
if (!evolution_function_is_constant_p (diff))
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
- *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
+ *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
diff, CHREC_RIGHT (*chrec_a));
- right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
+ right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
*chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
build_int_cst (type, 0),
right_b);
CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
static void
-analyze_siv_subscript (tree chrec_a,
+analyze_siv_subscript (tree chrec_a,
tree chrec_b,
- conflict_function **overlaps_a,
- conflict_function **overlaps_b,
- tree *last_conflicts)
+ conflict_function **overlaps_a,
+ conflict_function **overlaps_b,
+ tree *last_conflicts,
+ int loop_nest_num)
{
dependence_stats.num_siv++;
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "(analyze_siv_subscript \n");
-
+
if (evolution_function_is_constant_p (chrec_a)
- && evolution_function_is_affine_p (chrec_b))
- analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
+ && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
+ analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
overlaps_a, overlaps_b, last_conflicts);
-
- else if (evolution_function_is_affine_p (chrec_a)
+
+ else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
&& evolution_function_is_constant_p (chrec_b))
- analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
+ analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
overlaps_b, overlaps_a, last_conflicts);
-
- else if (evolution_function_is_affine_p (chrec_a)
- && evolution_function_is_affine_p (chrec_b))
+
+ else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
+ && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
{
if (!chrec_contains_symbols (chrec_a)
&& !chrec_contains_symbols (chrec_b))
{
- analyze_subscript_affine_affine (chrec_a, chrec_b,
- overlaps_a, overlaps_b,
+ analyze_subscript_affine_affine (chrec_a, chrec_b,
+ overlaps_a, overlaps_b,
last_conflicts);
if (CF_NOT_KNOWN_P (*overlaps_a)
else
dependence_stats.num_siv_dependent++;
}
- else if (can_use_analyze_subscript_affine_affine (&chrec_a,
+ else if (can_use_analyze_subscript_affine_affine (&chrec_a,
&chrec_b))
{
- analyze_subscript_affine_affine (chrec_a, chrec_b,
- overlaps_a, overlaps_b,
+ analyze_subscript_affine_affine (chrec_a, chrec_b,
+ overlaps_a, overlaps_b,
last_conflicts);
if (CF_NOT_KNOWN_P (*overlaps_a)
*last_conflicts = chrec_dont_know;
dependence_stats.num_siv_unimplemented++;
}
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, ")\n");
}
of CHREC does not divide CST, false otherwise. */
static bool
-gcd_of_steps_may_divide_p (tree chrec, tree cst)
+gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
{
HOST_WIDE_INT cd = 0, val;
tree step;
CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
static void
-analyze_miv_subscript (tree chrec_a,
- tree chrec_b,
- conflict_function **overlaps_a,
- conflict_function **overlaps_b,
+analyze_miv_subscript (tree chrec_a,
+ tree chrec_b,
+ conflict_function **overlaps_a,
+ conflict_function **overlaps_b,
tree *last_conflicts,
struct loop *loop_nest)
{
- /* FIXME: This is a MIV subscript, not yet handled.
- Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
- (A[i] vs. A[j]).
-
- In the SIV test we had to solve a Diophantine equation with two
- variables. In the MIV case we have to solve a Diophantine
- equation with 2*n variables (if the subscript uses n IVs).
- */
- tree difference;
+ tree type, difference;
+
dependence_stats.num_miv++;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "(analyze_miv_subscript \n");
- chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
- chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
- difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
-
+ type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
+ chrec_a = chrec_convert (type, chrec_a, NULL);
+ chrec_b = chrec_convert (type, chrec_b, NULL);
+ difference = chrec_fold_minus (type, chrec_a, chrec_b);
+
if (eq_evolutions_p (chrec_a, chrec_b))
{
/* Access functions are the same: all the elements are accessed
in the same order. */
*overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
*overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
- *last_conflicts = estimated_loop_iterations_tree
- (get_chrec_loop (chrec_a), true);
+ *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
dependence_stats.num_miv_dependent++;
}
-
+
else if (evolution_function_is_constant_p (difference)
/* For the moment, the following is verified:
evolution_function_is_affine_multivariate_p (chrec_a,
&& !gcd_of_steps_may_divide_p (chrec_a, difference))
{
/* testsuite/.../ssa-chrec-33.c
- {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
-
+ {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
+
The difference is 1, and all the evolution steps are multiples
of 2, consequently there are no overlapping elements. */
*overlaps_a = conflict_fn_no_dependence ();
*last_conflicts = integer_zero_node;
dependence_stats.num_miv_independent++;
}
-
+
else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
&& !chrec_contains_symbols (chrec_a)
&& evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
/* testsuite/.../ssa-chrec-35.c
{0, +, 1}_2 vs. {0, +, 1}_3
the overlapping elements are respectively located at iterations:
- {0, +, 1}_x and {0, +, 1}_x,
- in other words, we have the equality:
+ {0, +, 1}_x and {0, +, 1}_x,
+ in other words, we have the equality:
{0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
-
- Other examples:
- {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
+
+ Other examples:
+ {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
{0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
- {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
+ {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
{{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
*/
- analyze_subscript_affine_affine (chrec_a, chrec_b,
+ analyze_subscript_affine_affine (chrec_a, chrec_b,
overlaps_a, overlaps_b, last_conflicts);
if (CF_NOT_KNOWN_P (*overlaps_a)
else
dependence_stats.num_miv_dependent++;
}
-
+
else
{
/* When the analysis is too difficult, answer "don't know". */
*last_conflicts = chrec_dont_know;
dependence_stats.num_miv_unimplemented++;
}
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, ")\n");
}
with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
OVERLAP_ITERATIONS_B are initialized with two functions that
describe the iterations that contain conflicting elements.
-
+
Remark: For an integer k >= 0, the following equality is true:
-
+
CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
*/
-static void
-analyze_overlapping_iterations (tree chrec_a,
- tree chrec_b,
- conflict_function **overlap_iterations_a,
- conflict_function **overlap_iterations_b,
+static void
+analyze_overlapping_iterations (tree chrec_a,
+ tree chrec_b,
+ conflict_function **overlap_iterations_a,
+ conflict_function **overlap_iterations_b,
tree *last_conflicts, struct loop *loop_nest)
{
unsigned int lnn = loop_nest->num;
dependence_stats.num_subscript_tests++;
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "(analyze_overlapping_iterations \n");
|| chrec_contains_undetermined (chrec_b))
{
dependence_stats.num_subscript_undetermined++;
-
+
*overlap_iterations_a = conflict_fn_not_known ();
*overlap_iterations_b = conflict_fn_not_known ();
}
- /* If they are the same chrec, and are affine, they overlap
+ /* If they are the same chrec, and are affine, they overlap
on every iteration. */
else if (eq_evolutions_p (chrec_a, chrec_b)
- && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
+ && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
+ || operand_equal_p (chrec_a, chrec_b, 0)))
{
dependence_stats.num_same_subscript_function++;
*overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
}
/* If they aren't the same, and aren't affine, we can't do anything
- yet. */
- else if ((chrec_contains_symbols (chrec_a)
+ yet. */
+ else if ((chrec_contains_symbols (chrec_a)
|| chrec_contains_symbols (chrec_b))
&& (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
|| !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
}
else if (ziv_subscript_p (chrec_a, chrec_b))
- analyze_ziv_subscript (chrec_a, chrec_b,
+ analyze_ziv_subscript (chrec_a, chrec_b,
overlap_iterations_a, overlap_iterations_b,
last_conflicts);
-
+
else if (siv_subscript_p (chrec_a, chrec_b))
- analyze_siv_subscript (chrec_a, chrec_b,
- overlap_iterations_a, overlap_iterations_b,
- last_conflicts);
-
+ analyze_siv_subscript (chrec_a, chrec_b,
+ overlap_iterations_a, overlap_iterations_b,
+ last_conflicts, lnn);
+
else
- analyze_miv_subscript (chrec_a, chrec_b,
+ analyze_miv_subscript (chrec_a, chrec_b,
overlap_iterations_a, overlap_iterations_b,
last_conflicts, loop_nest);
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, " (overlap_iterations_a = ");
unsigned i;
lambda_vector v;
- for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
+ FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
return;
unsigned i;
lambda_vector v;
- for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
+ FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
return;
access_fn_a = DR_ACCESS_FN (ddr_a, i);
access_fn_b = DR_ACCESS_FN (ddr_b, i);
- if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
+ if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
&& TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
{
int dist, index;
- int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
- DDR_LOOP_NEST (ddr));
- int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
- DDR_LOOP_NEST (ddr));
-
- /* The dependence is carried by the outermost loop. Example:
- | loop_1
- | A[{4, +, 1}_1]
- | loop_2
- | A[{5, +, 1}_2]
- | endloop_2
- | endloop_1
- In this case, the dependence is carried by loop_1. */
- index = index_a < index_b ? index_a : index_b;
- *index_carry = MIN (index, *index_carry);
+ int var_a = CHREC_VARIABLE (access_fn_a);
+ int var_b = CHREC_VARIABLE (access_fn_b);
- if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
+ if (var_a != var_b
+ || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
{
non_affine_dependence_relation (ddr);
return false;
}
-
+
dist = int_cst_value (SUB_DISTANCE (subscript));
+ index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
+ *index_carry = MIN (index, *index_carry);
/* This is the subscript coupling test. If we have already
recorded a distance for this loop (a distance coming from
return true;
}
-/* Return true when the DDR contains two data references that have the
- same access functions. */
-
-static bool
-same_access_functions (struct data_dependence_relation *ddr)
-{
- unsigned i;
-
- for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
- if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
- DR_ACCESS_FN (DDR_B (ddr), i)))
- return false;
-
- return true;
-}
-
/* Return true when the DDR contains only constant access functions. */
static bool
-constant_access_functions (struct data_dependence_relation *ddr)
+constant_access_functions (const struct data_dependence_relation *ddr)
{
unsigned i;
return true;
}
-
/* Helper function for the case where DDR_A and DDR_B are the same
- multivariate access function. */
+ multivariate access function with a constant step. For an example
+ see pr34635-1.c. */
static void
add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
return;
}
- add_multivariate_self_dist (ddr, DR_ACCESS_FN (DDR_A (ddr), 0));
+ access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
+
+ if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
+ add_multivariate_self_dist (ddr, access_fun);
+ else
+ /* The evolution step is not constant: it varies in
+ the outer loop, so this cannot be represented by a
+ distance vector. For example in pr34635.c the
+ evolution is {0, +, {0, +, 4}_1}_2. */
+ DDR_AFFINE_P (ddr) = false;
+
return;
}
lambda_vector dist_v;
if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
- return true;
+ return false;
if (same_access_functions (ddr))
{
if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
{
lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
- subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
- loop_nest);
+ if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
+ loop_nest))
+ return false;
compute_subscript_distance (ddr);
- build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
- save_v, &init_b, &index_carry);
+ if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
+ save_v, &init_b, &index_carry))
+ return false;
save_dist_v (ddr, save_v);
DDR_REVERSED_P (ddr) = true;
| T[j][i] = t + 2; // B
| }
- the vectors are:
+ the vectors are:
(0, 1, -1)
(1, 1, -1)
(1, -1, 1)
{
lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
- save_dist_v (ddr, save_v);
if (DDR_NB_LOOPS (ddr) > 1)
{
lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
- subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
- loop_nest);
+ if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
+ DDR_A (ddr), loop_nest))
+ return false;
compute_subscript_distance (ddr);
- build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
- opposite_v, &init_b, &index_carry);
+ if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
+ opposite_v, &init_b,
+ &index_carry))
+ return false;
+ save_dist_v (ddr, save_v);
add_outer_distances (ddr, dist_v, index_carry);
add_outer_distances (ddr, opposite_v, index_carry);
}
+ else
+ save_dist_v (ddr, save_v);
}
}
else
unsigned i, j;
lambda_vector dist_v;
- for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
+ FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
{
lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
{
conflict_function *overlaps_a, *overlaps_b;
- analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
+ analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
DR_ACCESS_FN (drb, i),
- &overlaps_a, &overlaps_b,
+ &overlaps_a, &overlaps_b,
&last_conflicts, loop_nest);
if (CF_NOT_KNOWN_P (overlaps_a)
else
{
+ if (SUB_CONFLICTS_IN_A (subscript))
+ free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
+ if (SUB_CONFLICTS_IN_B (subscript))
+ free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
+
SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
SUB_LAST_CONFLICT (subscript) = last_conflicts;
subscript_dependence_tester (struct data_dependence_relation *ddr,
struct loop *loop_nest)
{
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "(subscript_dependence_tester \n");
-
+
if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
dependence_stats.num_dependence_dependent++;
/* Returns true when all the access functions of A are affine or
constant with respect to LOOP_NEST. */
-static bool
-access_functions_are_affine_or_constant_p (struct data_reference *a,
- struct loop *loop_nest)
+static bool
+access_functions_are_affine_or_constant_p (const struct data_reference *a,
+ const struct loop *loop_nest)
{
unsigned int i;
VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
tree t;
- for (i = 0; VEC_iterate (tree, fns, i, t); i++)
+ FOR_EACH_VEC_ELT (tree, fns, i, t)
if (!evolution_function_is_invariant_p (t, loop_nest->num)
&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
return false;
-
+
return true;
}
ACCESS_FUN is expected to be an affine chrec. */
static bool
-init_omega_eq_with_af (omega_pb pb, unsigned eq,
- unsigned int offset, tree access_fun,
+init_omega_eq_with_af (omega_pb pb, unsigned eq,
+ unsigned int offset, tree access_fun,
struct data_dependence_relation *ddr)
{
switch (TREE_CODE (access_fun))
DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
if (offset == 0)
- pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
+ pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
+= int_cst_value (right);
switch (TREE_CODE (left))
/* Set a new problem for each loop in the nest. The basis is the
problem that we have initialized until now. On top of this we
add new constraints. */
- for (i = 0; i <= DDR_INNER_LOOP (ddr)
+ for (i = 0; i <= DDR_INNER_LOOP (ddr)
&& VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
{
int dist = 0;
/* Reduce the constraint system, and test that the current
problem is feasible. */
res = omega_simplify_problem (copy);
- if (res == omega_false
+ if (res == omega_false
|| res == omega_unknown
|| copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
goto next_problem;
copy->eqs[eq].coef[0] = -1;
res = omega_simplify_problem (copy);
- if (res == omega_false
+ if (res == omega_false
|| res == omega_unknown
|| copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
goto next_problem;
omega_pb pb, bool *maybe_dependent)
{
int eq;
- tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE);
- tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE);
- tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b);
+ tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
+ TREE_TYPE (access_fun_b));
+ tree fun_a = chrec_convert (type, access_fun_a, NULL);
+ tree fun_b = chrec_convert (type, access_fun_b, NULL);
+ tree difference = chrec_fold_minus (type, fun_a, fun_b);
+ tree minus_one;
/* When the fun_a - fun_b is not constant, the dependence is not
captured by the classic distance vector representation. */
return true;
}
- fun_b = chrec_fold_multiply (integer_type_node, fun_b,
- integer_minus_one_node);
+ minus_one = build_int_cst (type, -1);
+ fun_b = chrec_fold_multiply (type, fun_b, minus_one);
eq = omega_add_zero_eq (pb, omega_black);
if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
/* GCD test. */
if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
- && !int_divides_p (lambda_vector_gcd
+ && !int_divides_p (lambda_vector_gcd
((lambda_vector) &(pb->eqs[eq].coef[1]),
2 * DDR_NB_LOOPS (ddr)),
pb->eqs[eq].coef[0]))
removed by the solver: the "dx"
- coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
*/
- for (i = 0; i <= DDR_INNER_LOOP (ddr)
+ for (i = 0; i <= DDR_INNER_LOOP (ddr)
&& VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
{
- HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
+ HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
/* 0 <= loop_x */
ineq = omega_add_zero_geq (pb, omega_black);
set MAYBE_DEPENDENT to true.
Example: for setting up the dependence system corresponding to the
- conflicting accesses
+ conflicting accesses
| loop_i
| loop_j
| ... A[2*j, 2*(i + j)]
| endloop_j
| endloop_i
-
+
the following constraints come from the iteration domain:
0 <= i <= Ni
DDR_NUM_DIST_VECTS (ddr));
fprintf (file, "Banerjee dist vectors:\n");
- for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
+ FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
fprintf (file, "Omega dist vectors:\n");
/* Distance vectors are not ordered in the same way in the DDR
and in the DIST_VECTS: search for a matching vector. */
- for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
+ FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
break;
/* Direction vectors are not ordered in the same way in the DDR
and in the DIR_VECTS: search for a matching vector. */
- for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
+ FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
break;
}
}
- return true;
+ return true;
}
/* This computes the affine dependence relation between A and B with
respect to LOOP_NEST. CHREC_KNOWN is used for representing the
independence between two accesses, while CHREC_DONT_KNOW is used
for representing the unknown relation.
-
+
Note that it is possible to stop the computation of the dependence
relation the first time we detect a CHREC_KNOWN element for a given
subscript. */
{
struct data_reference *dra = DDR_A (ddr);
struct data_reference *drb = DDR_B (ddr);
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "(compute_affine_dependence\n");
fprintf (dump_file, " (stmt_a = \n");
- print_generic_expr (dump_file, DR_STMT (dra), 0);
+ print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
fprintf (dump_file, ")\n (stmt_b = \n");
- print_generic_expr (dump_file, DR_STMT (drb), 0);
+ print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
fprintf (dump_file, ")\n");
}
/* Analyze only when the dependence relation is not yet known. */
- if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
+ if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
+ && !DDR_SELF_REFERENCE (ddr))
{
dependence_stats.num_dependence_tests++;
else
subscript_dependence_tester (ddr, loop_nest);
}
-
+
/* As a last case, if the dependence cannot be determined, or if
the dependence is considered too difficult to determine, answer
"don't know". */
finalize_ddr_dependent (ddr, chrec_dont_know);
}
}
-
+
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, ")\n");
}
/* This computes the dependence relation for the same data
reference into DDR. */
-static void
+void
compute_self_dependence (struct data_dependence_relation *ddr)
{
unsigned int i;
for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
i++)
{
+ if (SUB_CONFLICTS_IN_A (subscript))
+ free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
+ if (SUB_CONFLICTS_IN_B (subscript))
+ free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
+
/* The accessed index overlaps for each iteration. */
SUB_CONFLICTS_IN_A (subscript)
- = conflict_fn (1, affine_fn_cst (integer_zero_node));
+ = conflict_fn (1, affine_fn_cst (integer_zero_node));
SUB_CONFLICTS_IN_B (subscript)
- = conflict_fn (1, affine_fn_cst (integer_zero_node));
+ = conflict_fn (1, affine_fn_cst (integer_zero_node));
SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
}
COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
relations. */
-void
+void
compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
VEC (ddr_p, heap) **dependence_relations,
VEC (loop_p, heap) *loop_nest,
struct data_reference *a, *b;
unsigned int i, j;
- for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
+ FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
- if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
+ if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
{
ddr = initialize_data_dependence_relation (a, b, loop_nest);
VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
- compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
+ if (loop_nest)
+ compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
}
if (compute_self_and_rr)
- for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
+ FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
{
ddr = initialize_data_dependence_relation (a, a, loop_nest);
VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
true if STMT clobbers memory, false otherwise. */
bool
-get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
+get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
{
bool clobbers_memory = false;
data_ref_loc *ref;
- tree *op0, *op1, call;
+ tree *op0, *op1;
+ enum gimple_code stmt_code = gimple_code (stmt);
*references = NULL;
/* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
Calls have side-effects, except those to const or pure
functions. */
- call = get_call_expr_in (stmt);
- if ((call
- && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
- || (TREE_CODE (stmt) == ASM_EXPR
- && ASM_VOLATILE_P (stmt)))
+ if ((stmt_code == GIMPLE_CALL
+ && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
+ || (stmt_code == GIMPLE_ASM
+ && gimple_asm_volatile_p (stmt)))
clobbers_memory = true;
- if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
+ if (!gimple_vuse (stmt))
return clobbers_memory;
- if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
+ if (stmt_code == GIMPLE_ASSIGN)
{
- op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
- op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
-
+ tree base;
+ op0 = gimple_assign_lhs_ptr (stmt);
+ op1 = gimple_assign_rhs1_ptr (stmt);
+
if (DECL_P (*op1)
- || REFERENCE_CLASS_P (*op1))
+ || (REFERENCE_CLASS_P (*op1)
+ && (base = get_base_address (*op1))
+ && TREE_CODE (base) != SSA_NAME))
{
ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
ref->pos = op1;
ref->is_read = true;
}
-
- if (DECL_P (*op0)
- || REFERENCE_CLASS_P (*op0))
- {
- ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
- ref->pos = op0;
- ref->is_read = false;
- }
}
-
- if (call)
+ else if (stmt_code == GIMPLE_CALL)
{
- unsigned i, n = call_expr_nargs (call);
+ unsigned i, n;
+ op0 = gimple_call_lhs_ptr (stmt);
+ n = gimple_call_num_args (stmt);
for (i = 0; i < n; i++)
{
- op0 = &CALL_EXPR_ARG (call, i);
+ op1 = gimple_call_arg_ptr (stmt, i);
- if (DECL_P (*op0)
- || REFERENCE_CLASS_P (*op0))
+ if (DECL_P (*op1)
+ || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
{
ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
- ref->pos = op0;
+ ref->pos = op1;
ref->is_read = true;
}
}
}
+ else
+ return clobbers_memory;
+ if (*op0
+ && (DECL_P (*op0)
+ || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
+ {
+ ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
+ ref->pos = op0;
+ ref->is_read = false;
+ }
return clobbers_memory;
}
/* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
reference, returns false, otherwise returns true. NEST is the outermost
- loop of the loop nest in that the references should be analyzed. */
+ loop of the loop nest in which the references should be analyzed. */
-static bool
-find_data_references_in_stmt (struct loop *nest, tree stmt,
+bool
+find_data_references_in_stmt (struct loop *nest, gimple stmt,
VEC (data_reference_p, heap) **datarefs)
{
unsigned i;
return false;
}
- for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
+ FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
{
- dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
+ dr = create_data_ref (nest, loop_containing_stmt (stmt),
+ *ref->pos, stmt, ref->is_read);
gcc_assert (dr != NULL);
-
- /* FIXME -- data dependence analysis does not work correctly for objects with
- invariant addresses. Let us fail here until the problem is fixed. */
- if (dr_address_invariant_p (dr))
- {
- free_data_ref (dr);
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "\tFAILED as dr address is invariant\n");
- ret = false;
- break;
- }
+ VEC_safe_push (data_reference_p, heap, *datarefs, dr);
+ }
+ VEC_free (data_ref_loc, heap, references);
+ return ret;
+}
+
+/* Stores the data references in STMT to DATAREFS. If there is an
+ unanalyzable reference, returns false, otherwise returns true.
+ NEST is the outermost loop of the loop nest in which the references
+ should be instantiated, LOOP is the loop in which the references
+ should be analyzed. */
+
+bool
+graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
+ VEC (data_reference_p, heap) **datarefs)
+{
+ unsigned i;
+ VEC (data_ref_loc, heap) *references;
+ data_ref_loc *ref;
+ bool ret = true;
+ data_reference_p dr;
+
+ if (get_references_in_stmt (stmt, &references))
+ {
+ VEC_free (data_ref_loc, heap, references);
+ return false;
+ }
+ FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
+ {
+ dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
+ gcc_assert (dr != NULL);
VEC_safe_push (data_reference_p, heap, *datarefs, dr);
}
+
VEC_free (data_ref_loc, heap, references);
return ret;
}
/* Search the data references in LOOP, and record the information into
DATAREFS. Returns chrec_dont_know when failing to analyze a
+ difficult case, returns NULL_TREE otherwise. */
+
+tree
+find_data_references_in_bb (struct loop *loop, basic_block bb,
+ VEC (data_reference_p, heap) **datarefs)
+{
+ gimple_stmt_iterator bsi;
+
+ for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ {
+ gimple stmt = gsi_stmt (bsi);
+
+ if (!find_data_references_in_stmt (loop, stmt, datarefs))
+ {
+ struct data_reference *res;
+ res = XCNEW (struct data_reference);
+ VEC_safe_push (data_reference_p, heap, *datarefs, res);
+
+ return chrec_dont_know;
+ }
+ }
+
+ return NULL_TREE;
+}
+
+/* Search the data references in LOOP, and record the information into
+ DATAREFS. Returns chrec_dont_know when failing to analyze a
difficult case, returns NULL_TREE otherwise.
TODO: This function should be made smarter so that it can handle address
arithmetic as if they were array accesses, etc. */
-static tree
+tree
find_data_references_in_loop (struct loop *loop,
VEC (data_reference_p, heap) **datarefs)
{
basic_block bb, *bbs;
unsigned int i;
- block_stmt_iterator bsi;
bbs = get_loop_body_in_dom_order (loop);
{
bb = bbs[i];
- for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
- {
- tree stmt = bsi_stmt (bsi);
-
- if (!find_data_references_in_stmt (loop, stmt, datarefs))
- {
- struct data_reference *res;
- res = XCNEW (struct data_reference);
- VEC_safe_push (data_reference_p, heap, *datarefs, res);
-
- free (bbs);
- return chrec_dont_know;
- }
- }
+ if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
+ {
+ free (bbs);
+ return chrec_dont_know;
+ }
}
free (bbs);
return true;
}
-/* Given a loop nest LOOP, the following vectors are returned:
- DATAREFS is initialized to all the array elements contained in this loop,
- DEPENDENCE_RELATIONS contains the relations between the data references.
- Compute read-read and self relations if
+/* Returns true when the data dependences have been computed, false otherwise.
+ Given a loop nest LOOP, the following vectors are returned:
+ DATAREFS is initialized to all the array elements contained in this loop,
+ DEPENDENCE_RELATIONS contains the relations between the data references.
+ Compute read-read and self relations if
COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
-void
-compute_data_dependences_for_loop (struct loop *loop,
+bool
+compute_data_dependences_for_loop (struct loop *loop,
bool compute_self_and_read_read_dependences,
+ VEC (loop_p, heap) **loop_nest,
VEC (data_reference_p, heap) **datarefs,
VEC (ddr_p, heap) **dependence_relations)
{
- VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
+ bool res = true;
memset (&dependence_stats, 0, sizeof (dependence_stats));
- /* If the loop nest is not well formed, or one of the data references
+ /* If the loop nest is not well formed, or one of the data references
is not computable, give up without spending time to compute other
dependences. */
if (!loop
- || !find_loop_nest (loop, &vloops)
+ || !find_loop_nest (loop, loop_nest)
|| find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
{
struct data_dependence_relation *ddr;
/* Insert a single relation into dependence_relations:
chrec_dont_know. */
- ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
+ ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
+ res = false;
}
else
- compute_all_dependences (*datarefs, dependence_relations, vloops,
+ compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
compute_self_and_read_read_dependences);
if (dump_file && (dump_flags & TDF_STATS))
{
fprintf (dump_file, "Dependence tester statistics:\n");
- fprintf (dump_file, "Number of dependence tests: %d\n",
+ fprintf (dump_file, "Number of dependence tests: %d\n",
dependence_stats.num_dependence_tests);
- fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
+ fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
dependence_stats.num_dependence_dependent);
- fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
+ fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
dependence_stats.num_dependence_independent);
- fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
+ fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
dependence_stats.num_dependence_undetermined);
- fprintf (dump_file, "Number of subscript tests: %d\n",
+ fprintf (dump_file, "Number of subscript tests: %d\n",
dependence_stats.num_subscript_tests);
- fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
+ fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
dependence_stats.num_subscript_undetermined);
- fprintf (dump_file, "Number of same subscript function: %d\n",
+ fprintf (dump_file, "Number of same subscript function: %d\n",
dependence_stats.num_same_subscript_function);
fprintf (dump_file, "Number of ziv tests: %d\n",
fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
dependence_stats.num_ziv_independent);
fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
- dependence_stats.num_ziv_unimplemented);
+ dependence_stats.num_ziv_unimplemented);
- fprintf (dump_file, "Number of siv tests: %d\n",
+ fprintf (dump_file, "Number of siv tests: %d\n",
dependence_stats.num_siv);
fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
dependence_stats.num_siv_dependent);
fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
dependence_stats.num_siv_unimplemented);
- fprintf (dump_file, "Number of miv tests: %d\n",
+ fprintf (dump_file, "Number of miv tests: %d\n",
dependence_stats.num_miv);
fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
dependence_stats.num_miv_dependent);
dependence_stats.num_miv_independent);
fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
dependence_stats.num_miv_unimplemented);
- }
+ }
+
+ return res;
+}
+
+/* Returns true when the data dependences for the basic block BB have been
+ computed, false otherwise.
+ DATAREFS is initialized to all the array elements contained in this basic
+ block, DEPENDENCE_RELATIONS contains the relations between the data
+ references. Compute read-read and self relations if
+ COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
+bool
+compute_data_dependences_for_bb (basic_block bb,
+ bool compute_self_and_read_read_dependences,
+ VEC (data_reference_p, heap) **datarefs,
+ VEC (ddr_p, heap) **dependence_relations)
+{
+ if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
+ return false;
+
+ compute_all_dependences (*datarefs, dependence_relations, NULL,
+ compute_self_and_read_read_dependences);
+ return true;
}
/* Entry point (for testing only). Analyze all the data references
and the dependence relations in LOOP.
- The data references are computed first.
-
+ The data references are computed first.
+
A relation on these nodes is represented by a complete graph. Some
of the relations could be of no interest, thus the relations can be
computed on demand.
-
+
In the following function we compute all the relations. This is
just a first implementation that is here for:
- - for showing how to ask for the dependence relations,
+ - for showing how to ask for the dependence relations,
- for the debugging the whole dependence graph,
- for the dejagnu testcases and maintenance.
-
+
It is possible to ask only for a part of the graph, avoiding to
compute the whole dependence graph. The computed dependences are
stored in a knowledge base (KB) such that later queries don't
recompute the same information. The implementation of this KB is
transparent to the optimizer, and thus the KB can be changed with a
more efficient implementation, or the KB could be disabled. */
-static void
+static void
analyze_all_data_dependences (struct loop *loop)
{
unsigned int i;
int nb_data_refs = 10;
- VEC (data_reference_p, heap) *datarefs =
+ VEC (data_reference_p, heap) *datarefs =
VEC_alloc (data_reference_p, heap, nb_data_refs);
- VEC (ddr_p, heap) *dependence_relations =
+ VEC (ddr_p, heap) *dependence_relations =
VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
+ VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
/* Compute DDs on the whole function. */
- compute_data_dependences_for_loop (loop, false, &datarefs,
+ compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
&dependence_relations);
if (dump_file)
{
unsigned nb_top_relations = 0;
unsigned nb_bot_relations = 0;
- unsigned nb_basename_differ = 0;
unsigned nb_chrec_relations = 0;
struct data_dependence_relation *ddr;
- for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
+ FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
{
if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
nb_top_relations++;
-
+
else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
- {
- struct data_reference *a = DDR_A (ddr);
- struct data_reference *b = DDR_B (ddr);
+ nb_bot_relations++;
- if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
- nb_basename_differ++;
- else
- nb_bot_relations++;
- }
-
- else
+ else
nb_chrec_relations++;
}
-
+
gather_stats_on_scev_database ();
}
}
+ VEC_free (loop_p, heap, loop_nest);
free_dependence_relations (dependence_relations);
free_data_refs (datarefs);
}
if (ddr == NULL)
return;
- if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_SUBSCRIPTS (ddr))
+ if (DDR_SUBSCRIPTS (ddr))
free_subscripts (DDR_SUBSCRIPTS (ddr));
+ if (DDR_DIST_VECTS (ddr))
+ VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
+ if (DDR_DIR_VECTS (ddr))
+ VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
free (ddr);
}
/* Free the memory used by the data dependence relations from
DEPENDENCE_RELATIONS. */
-void
+void
free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
{
unsigned int i;
struct data_dependence_relation *ddr;
- VEC (loop_p, heap) *loop_nest = NULL;
- for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
- {
- if (ddr == NULL)
- continue;
- if (loop_nest == NULL)
- loop_nest = DDR_LOOP_NEST (ddr);
- else
- gcc_assert (DDR_LOOP_NEST (ddr) == NULL
- || DDR_LOOP_NEST (ddr) == loop_nest);
+ FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
+ if (ddr)
free_dependence_relation (ddr);
- }
- if (loop_nest)
- VEC_free (loop_p, heap, loop_nest);
VEC_free (ddr_p, heap, dependence_relations);
}
unsigned int i;
struct data_reference *dr;
- for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
+ FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
free_data_ref (dr);
VEC_free (data_reference_p, heap, datarefs);
}
\f
-/* Returns the index of STMT in RDG. */
+/* Dump vertex I in RDG to FILE. */
-static int
-find_vertex_for_stmt (struct graph *rdg, tree stmt)
+void
+dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
+{
+ struct vertex *v = &(rdg->vertices[i]);
+ struct graph_edge *e;
+
+ fprintf (file, "(vertex %d: (%s%s) (in:", i,
+ RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
+ RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
+
+ if (v->pred)
+ for (e = v->pred; e; e = e->pred_next)
+ fprintf (file, " %d", e->src);
+
+ fprintf (file, ") (out:");
+
+ if (v->succ)
+ for (e = v->succ; e; e = e->succ_next)
+ fprintf (file, " %d", e->dest);
+
+ fprintf (file, ")\n");
+ print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
+ fprintf (file, ")\n");
+}
+
+/* Call dump_rdg_vertex on stderr. */
+
+DEBUG_FUNCTION void
+debug_rdg_vertex (struct graph *rdg, int i)
+{
+ dump_rdg_vertex (stderr, rdg, i);
+}
+
+/* Dump component C of RDG to FILE. If DUMPED is non-null, set the
+ dumped vertices to that bitmap. */
+
+void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
+{
+ int i;
+
+ fprintf (file, "(%d\n", c);
+
+ for (i = 0; i < rdg->n_vertices; i++)
+ if (rdg->vertices[i].component == c)
+ {
+ if (dumped)
+ bitmap_set_bit (dumped, i);
+
+ dump_rdg_vertex (file, rdg, i);
+ }
+
+ fprintf (file, ")\n");
+}
+
+/* Call dump_rdg_vertex on stderr. */
+
+DEBUG_FUNCTION void
+debug_rdg_component (struct graph *rdg, int c)
+{
+ dump_rdg_component (stderr, rdg, c, NULL);
+}
+
+/* Dump the reduced dependence graph RDG to FILE. */
+
+void
+dump_rdg (FILE *file, struct graph *rdg)
{
int i;
+ bitmap dumped = BITMAP_ALLOC (NULL);
+
+ fprintf (file, "(rdg\n");
for (i = 0; i < rdg->n_vertices; i++)
- if (RDGV_STMT (&(rdg->vertices[i])) == stmt)
- return i;
+ if (!bitmap_bit_p (dumped, i))
+ dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
- gcc_unreachable ();
- return 0;
+ fprintf (file, ")\n");
+ BITMAP_FREE (dumped);
}
-/* Creates an edge in RDG for each distance vector from DDR. */
+/* Call dump_rdg on stderr. */
+
+DEBUG_FUNCTION void
+debug_rdg (struct graph *rdg)
+{
+ dump_rdg (stderr, rdg);
+}
static void
-create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
+dot_rdg_1 (FILE *file, struct graph *rdg)
{
- int va, vb;
- data_reference_p dra;
- data_reference_p drb;
- struct graph_edge *e;
+ int i;
- if (DDR_REVERSED_P (ddr))
+ fprintf (file, "digraph RDG {\n");
+
+ for (i = 0; i < rdg->n_vertices; i++)
{
- dra = DDR_B (ddr);
- drb = DDR_A (ddr);
+ struct vertex *v = &(rdg->vertices[i]);
+ struct graph_edge *e;
+
+ /* Highlight reads from memory. */
+ if (RDG_MEM_READS_STMT (rdg, i))
+ fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
+
+ /* Highlight stores to memory. */
+ if (RDG_MEM_WRITE_STMT (rdg, i))
+ fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
+
+ if (v->succ)
+ for (e = v->succ; e; e = e->succ_next)
+ switch (RDGE_TYPE (e))
+ {
+ case input_dd:
+ fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
+ break;
+
+ case output_dd:
+ fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
+ break;
+
+ case flow_dd:
+ /* These are the most common dependences: don't print these. */
+ fprintf (file, "%d -> %d \n", i, e->dest);
+ break;
+
+ case anti_dd:
+ fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
}
- else
+
+ fprintf (file, "}\n\n");
+}
+
+/* Display the Reduced Dependence Graph using dotty. */
+extern void dot_rdg (struct graph *);
+
+DEBUG_FUNCTION void
+dot_rdg (struct graph *rdg)
+{
+ /* When debugging, enable the following code. This cannot be used
+ in production compilers because it calls "system". */
+#if 0
+ FILE *file = fopen ("/tmp/rdg.dot", "w");
+ gcc_assert (file != NULL);
+
+ dot_rdg_1 (file, rdg);
+ fclose (file);
+
+ system ("dotty /tmp/rdg.dot &");
+#else
+ dot_rdg_1 (stderr, rdg);
+#endif
+}
+
+/* This structure is used for recording the mapping statement index in
+ the RDG. */
+
+struct GTY(()) rdg_vertex_info
+{
+ gimple stmt;
+ int index;
+};
+
+/* Returns the index of STMT in RDG. */
+
+int
+rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
+{
+ struct rdg_vertex_info rvi, *slot;
+
+ rvi.stmt = stmt;
+ slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
+
+ if (!slot)
+ return -1;
+
+ return slot->index;
+}
+
+/* Creates an edge in RDG for each distance vector from DDR. The
+ order that we keep track of in the RDG is the order in which
+ statements have to be executed. */
+
+static void
+create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
+{
+ struct graph_edge *e;
+ int va, vb;
+ data_reference_p dra = DDR_A (ddr);
+ data_reference_p drb = DDR_B (ddr);
+ unsigned level = ddr_dependence_level (ddr);
+
+ /* For non scalar dependences, when the dependence is REVERSED,
+ statement B has to be executed before statement A. */
+ if (level > 0
+ && !DDR_REVERSED_P (ddr))
{
- dra = DDR_A (ddr);
- drb = DDR_B (ddr);
+ data_reference_p tmp = dra;
+ dra = drb;
+ drb = tmp;
}
- va = find_vertex_for_stmt (rdg, DR_STMT (dra));
- vb = find_vertex_for_stmt (rdg, DR_STMT (drb));
+ va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
+ vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
+
+ if (va < 0 || vb < 0)
+ return;
e = add_edge (rdg, va, vb);
e->data = XNEW (struct rdg_edge);
+ RDGE_LEVEL (e) = level;
+ RDGE_RELATION (e) = ddr;
+
/* Determines the type of the data dependence. */
if (DR_IS_READ (dra) && DR_IS_READ (drb))
RDGE_TYPE (e) = input_dd;
- else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
+ else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
RDGE_TYPE (e) = output_dd;
- else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
+ else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
RDGE_TYPE (e) = flow_dd;
- else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
+ else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
RDGE_TYPE (e) = anti_dd;
}
{
use_operand_p imm_use_p;
imm_use_iterator iterator;
-
+
FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
{
- int use = find_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
- struct graph_edge *e = add_edge (rdg, idef, use);
+ struct graph_edge *e;
+ int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
+
+ if (use < 0)
+ continue;
+ e = add_edge (rdg, idef, use);
e->data = XNEW (struct rdg_edge);
RDGE_TYPE (e) = flow_dd;
+ RDGE_RELATION (e) = NULL;
}
}
def_operand_p def_p;
ssa_op_iter iter;
- for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
+ FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
create_rdg_edge_for_ddr (rdg, ddr);
for (i = 0; i < rdg->n_vertices; i++)
- FOR_EACH_PHI_OR_STMT_DEF (def_p, RDGV_STMT (&(rdg->vertices[i])),
- iter, SSA_OP_ALL_DEFS)
+ FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
+ iter, SSA_OP_DEF)
create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
}
/* Build the vertices of the reduced dependence graph RDG. */
-static void
-create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
+void
+create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
{
- int i;
- tree s;
+ int i, j;
+ gimple stmt;
- for (i = 0; VEC_iterate (tree, stmts, i, s); i++)
+ FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
{
+ VEC (data_ref_loc, heap) *references;
+ data_ref_loc *ref;
struct vertex *v = &(rdg->vertices[i]);
+ struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
+ struct rdg_vertex_info **slot;
+
+ rvi->stmt = stmt;
+ rvi->index = i;
+ slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
+
+ if (!*slot)
+ *slot = rvi;
+ else
+ free (rvi);
v->data = XNEW (struct rdg_vertex);
- RDGV_STMT (v) = s;
+ RDG_STMT (rdg, i) = stmt;
+
+ RDG_MEM_WRITE_STMT (rdg, i) = false;
+ RDG_MEM_READS_STMT (rdg, i) = false;
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ continue;
+
+ get_references_in_stmt (stmt, &references);
+ FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
+ if (!ref->is_read)
+ RDG_MEM_WRITE_STMT (rdg, i) = true;
+ else
+ RDG_MEM_READS_STMT (rdg, i) = true;
+
+ VEC_free (data_ref_loc, heap, references);
}
}
-/* Initialize STMTS with all the statements and PHI nodes of LOOP. */
+/* Initialize STMTS with all the statements of LOOP. When
+ INCLUDE_PHIS is true, include also the PHI nodes. The order in
+ which we discover statements is important as
+ generate_loops_for_partition is using the same traversal for
+ identifying statements. */
static void
-stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
+stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
{
unsigned int i;
basic_block *bbs = get_loop_body_in_dom_order (loop);
for (i = 0; i < loop->num_nodes; i++)
{
- tree phi;
basic_block bb = bbs[i];
- block_stmt_iterator bsi;
+ gimple_stmt_iterator bsi;
+ gimple stmt;
- for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
- VEC_safe_push (tree, heap, *stmts, phi);
+ for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
- for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
- VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
+ for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ {
+ stmt = gsi_stmt (bsi);
+ if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
+ VEC_safe_push (gimple, heap, *stmts, stmt);
+ }
}
free (bbs);
ddr_p ddr;
unsigned int i;
- for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
+ FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
return false;
-
+
return true;
}
-/* Build a Reduced Dependence Graph with one vertex per statement of the
- loop nest and one edge per data dependence or scalar dependence. */
+/* Computes a hash function for element ELT. */
+
+static hashval_t
+hash_stmt_vertex_info (const void *elt)
+{
+ const struct rdg_vertex_info *const rvi =
+ (const struct rdg_vertex_info *) elt;
+ gimple stmt = rvi->stmt;
+
+ return htab_hash_pointer (stmt);
+}
+
+/* Compares database elements E1 and E2. */
+
+static int
+eq_stmt_vertex_info (const void *e1, const void *e2)
+{
+ const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
+ const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
+
+ return elt1->stmt == elt2->stmt;
+}
+
+/* Free the element E. */
+
+static void
+hash_stmt_vertex_del (void *e)
+{
+ free (e);
+}
+
+/* Build the Reduced Dependence Graph (RDG) with one vertex per
+ statement of the loop nest, and one edge per data dependence or
+ scalar dependence. */
struct graph *
-build_rdg (struct loop *loop)
+build_empty_rdg (int n_stmts)
{
int nb_data_refs = 10;
+ struct graph *rdg = new_graph (n_stmts);
+
+ rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
+ eq_stmt_vertex_info, hash_stmt_vertex_del);
+ return rdg;
+}
+
+/* Build the Reduced Dependence Graph (RDG) with one vertex per
+ statement of the loop nest, and one edge per data dependence or
+ scalar dependence. */
+
+struct graph *
+build_rdg (struct loop *loop,
+ VEC (loop_p, heap) **loop_nest,
+ VEC (ddr_p, heap) **dependence_relations,
+ VEC (data_reference_p, heap) **datarefs)
+{
struct graph *rdg = NULL;
- VEC (ddr_p, heap) *dependence_relations;
- VEC (data_reference_p, heap) *datarefs;
- VEC (tree, heap) *stmts = VEC_alloc (tree, heap, 10);
-
- dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
- datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
- compute_data_dependences_for_loop (loop,
- false,
- &datarefs,
- &dependence_relations);
-
- if (!known_dependences_p (dependence_relations))
- goto end_rdg;
-
- stmts_from_loop (loop, &stmts);
- rdg = new_graph (VEC_length (tree, stmts));
- create_rdg_vertices (rdg, stmts);
- create_rdg_edges (rdg, dependence_relations);
-
- end_rdg:
- free_dependence_relations (dependence_relations);
- free_data_refs (datarefs);
- VEC_free (tree, heap, stmts);
+ VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
+ compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
+ dependence_relations);
+
+ if (known_dependences_p (*dependence_relations))
+ {
+ stmts_from_loop (loop, &stmts);
+ rdg = build_empty_rdg (VEC_length (gimple, stmts));
+ create_rdg_vertices (rdg, stmts);
+ create_rdg_edges (rdg, *dependence_relations);
+ }
+
+ VEC_free (gimple, heap, stmts);
return rdg;
}
+
+/* Free the reduced dependence graph RDG. */
+
+void
+free_rdg (struct graph *rdg)
+{
+ int i;
+
+ for (i = 0; i < rdg->n_vertices; i++)
+ {
+ struct vertex *v = &(rdg->vertices[i]);
+ struct graph_edge *e;
+
+ for (e = v->succ; e; e = e->succ_next)
+ free (e->data);
+
+ free (v->data);
+ }
+
+ htab_delete (rdg->indices);
+ free_graph (rdg);
+}
+
+/* Initialize STMTS with all the statements of LOOP that contain a
+ store to memory. */
+
+void
+stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
+{
+ unsigned int i;
+ basic_block *bbs = get_loop_body_in_dom_order (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+ gimple_stmt_iterator bsi;
+
+ for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ if (gimple_vdef (gsi_stmt (bsi)))
+ VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
+ }
+
+ free (bbs);
+}
+
+/* Returns true when the statement at STMT is of the form "A[i] = 0"
+ that contains a data reference on its LHS with a stride of the same
+ size as its unit type. */
+
+bool
+stmt_with_adjacent_zero_store_dr_p (gimple stmt)
+{
+ tree op0, op1;
+ bool res;
+ struct data_reference *dr;
+
+ if (!stmt
+ || !gimple_vdef (stmt)
+ || !is_gimple_assign (stmt)
+ || !gimple_assign_single_p (stmt)
+ || !(op1 = gimple_assign_rhs1 (stmt))
+ || !(integer_zerop (op1) || real_zerop (op1)))
+ return false;
+
+ dr = XCNEW (struct data_reference);
+ op0 = gimple_assign_lhs (stmt);
+
+ DR_STMT (dr) = stmt;
+ DR_REF (dr) = op0;
+
+ res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
+ && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
+
+ free_data_ref (dr);
+ return res;
+}
+
+/* Initialize STMTS with all the statements of LOOP that contain a
+ store to memory of the form "A[i] = 0". */
+
+void
+stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
+{
+ unsigned int i;
+ basic_block bb;
+ gimple_stmt_iterator si;
+ gimple stmt;
+ basic_block *bbs = get_loop_body_in_dom_order (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ if ((stmt = gsi_stmt (si))
+ && stmt_with_adjacent_zero_store_dr_p (stmt))
+ VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
+
+ free (bbs);
+}
+
+/* For a data reference REF, return the declaration of its base
+ address or NULL_TREE if the base is not determined. */
+
+static inline tree
+ref_base_address (gimple stmt, data_ref_loc *ref)
+{
+ tree base = NULL_TREE;
+ tree base_address;
+ struct data_reference *dr = XCNEW (struct data_reference);
+
+ DR_STMT (dr) = stmt;
+ DR_REF (dr) = *ref->pos;
+ dr_analyze_innermost (dr, loop_containing_stmt (stmt));
+ base_address = DR_BASE_ADDRESS (dr);
+
+ if (!base_address)
+ goto end;
+
+ switch (TREE_CODE (base_address))
+ {
+ case ADDR_EXPR:
+ base = TREE_OPERAND (base_address, 0);
+ break;
+
+ default:
+ base = base_address;
+ break;
+ }
+
+ end:
+ free_data_ref (dr);
+ return base;
+}
+
+/* Determines whether the statement from vertex V of the RDG has a
+ definition used outside the loop that contains this statement. */
+
+bool
+rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
+{
+ gimple stmt = RDG_STMT (rdg, v);
+ struct loop *loop = loop_containing_stmt (stmt);
+ use_operand_p imm_use_p;
+ imm_use_iterator iterator;
+ ssa_op_iter it;
+ def_operand_p def_p;
+
+ if (!loop)
+ return true;
+
+ FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
+ {
+ FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
+ {
+ if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Determines whether statements S1 and S2 access to similar memory
+ locations. Two memory accesses are considered similar when they
+ have the same base address declaration, i.e. when their
+ ref_base_address is the same. */
+
+bool
+have_similar_memory_accesses (gimple s1, gimple s2)
+{
+ bool res = false;
+ unsigned i, j;
+ VEC (data_ref_loc, heap) *refs1, *refs2;
+ data_ref_loc *ref1, *ref2;
+
+ get_references_in_stmt (s1, &refs1);
+ get_references_in_stmt (s2, &refs2);
+
+ FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
+ {
+ tree base1 = ref_base_address (s1, ref1);
+
+ if (base1)
+ FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
+ if (base1 == ref_base_address (s2, ref2))
+ {
+ res = true;
+ goto end;
+ }
+ }
+
+ end:
+ VEC_free (data_ref_loc, heap, refs1);
+ VEC_free (data_ref_loc, heap, refs2);
+ return res;
+}
+
+/* Helper function for the hashtab. */
+
+static int
+have_similar_memory_accesses_1 (const void *s1, const void *s2)
+{
+ return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
+ CONST_CAST_GIMPLE ((const_gimple) s2));
+}
+
+/* Helper function for the hashtab. */
+
+static hashval_t
+ref_base_address_1 (const void *s)
+{
+ gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
+ unsigned i;
+ VEC (data_ref_loc, heap) *refs;
+ data_ref_loc *ref;
+ hashval_t res = 0;
+
+ get_references_in_stmt (stmt, &refs);
+
+ FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
+ if (!ref->is_read)
+ {
+ res = htab_hash_pointer (ref_base_address (stmt, ref));
+ break;
+ }
+
+ VEC_free (data_ref_loc, heap, refs);
+ return res;
+}
+
+/* Try to remove duplicated write data references from STMTS. */
+
+void
+remove_similar_memory_refs (VEC (gimple, heap) **stmts)
+{
+ unsigned i;
+ gimple stmt;
+ htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
+ have_similar_memory_accesses_1, NULL);
+
+ for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
+ {
+ void **slot;
+
+ slot = htab_find_slot (seen, stmt, INSERT);
+
+ if (*slot)
+ VEC_ordered_remove (gimple, *stmts, i);
+ else
+ {
+ *slot = (void *) stmt;
+ i++;
+ }
+ }
+
+ htab_delete (seen);
+}
+
+/* Returns the index of PARAMETER in the parameters vector of the
+ ACCESS_MATRIX. If PARAMETER does not exist return -1. */
+
+int
+access_matrix_get_index_for_parameter (tree parameter,
+ struct access_matrix *access_matrix)
+{
+ int i;
+ VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
+ tree lambda_parameter;
+
+ FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
+ if (lambda_parameter == parameter)
+ return i + AM_NB_INDUCTION_VARS (access_matrix);
+
+ return -1;
+}