1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
89 static struct datadep_stats
91 int num_dependence_tests;
92 int num_dependence_dependent;
93 int num_dependence_independent;
94 int num_dependence_undetermined;
96 int num_subscript_tests;
97 int num_subscript_undetermined;
98 int num_same_subscript_function;
101 int num_ziv_independent;
102 int num_ziv_dependent;
103 int num_ziv_unimplemented;
106 int num_siv_independent;
107 int num_siv_dependent;
108 int num_siv_unimplemented;
111 int num_miv_independent;
112 int num_miv_dependent;
113 int num_miv_unimplemented;
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 struct data_reference *,
118 struct data_reference *,
120 /* Returns true iff A divides B. */
123 tree_fold_divides_p (const_tree a, const_tree b)
125 gcc_assert (TREE_CODE (a) == INTEGER_CST);
126 gcc_assert (TREE_CODE (b) == INTEGER_CST);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
130 /* Returns true iff A divides B. */
133 int_divides_p (int a, int b)
135 return ((b % a) == 0);
140 /* Dump into FILE all the data references from DATAREFS. */
143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
146 struct data_reference *dr;
148 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
149 dump_data_reference (file, dr);
152 /* Dump into STDERR all the data references from DATAREFS. */
155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
157 dump_data_references (stderr, datarefs);
160 /* Dump to STDERR all the dependence relations from DDRS. */
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165 dump_data_dependence_relations (stderr, ddrs);
168 /* Dump into FILE all the dependence relations from DDRS. */
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
175 struct data_dependence_relation *ddr;
177 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
178 dump_data_dependence_relation (file, ddr);
181 /* Print to STDERR the data_reference DR. */
184 debug_data_reference (struct data_reference *dr)
186 dump_data_reference (stderr, dr);
189 /* Dump function for a DATA_REFERENCE structure. */
192 dump_data_reference (FILE *outf,
193 struct data_reference *dr)
197 fprintf (outf, "#(Data Ref: \n");
198 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
199 fprintf (outf, "# stmt: ");
200 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
201 fprintf (outf, "# ref: ");
202 print_generic_stmt (outf, DR_REF (dr), 0);
203 fprintf (outf, "# base_object: ");
204 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
206 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
208 fprintf (outf, "# Access function %d: ", i);
209 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
211 fprintf (outf, "#)\n");
214 /* Dumps the affine function described by FN to the file OUTF. */
217 dump_affine_function (FILE *outf, affine_fn fn)
222 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
223 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
225 fprintf (outf, " + ");
226 print_generic_expr (outf, coef, TDF_SLIM);
227 fprintf (outf, " * x_%u", i);
231 /* Dumps the conflict function CF to the file OUTF. */
234 dump_conflict_function (FILE *outf, conflict_function *cf)
238 if (cf->n == NO_DEPENDENCE)
239 fprintf (outf, "no dependence\n");
240 else if (cf->n == NOT_KNOWN)
241 fprintf (outf, "not known\n");
244 for (i = 0; i < cf->n; i++)
247 dump_affine_function (outf, cf->fns[i]);
248 fprintf (outf, "]\n");
253 /* Dump function for a SUBSCRIPT structure. */
256 dump_subscript (FILE *outf, struct subscript *subscript)
258 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
260 fprintf (outf, "\n (subscript \n");
261 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
270 cf = SUB_CONFLICTS_IN_B (subscript);
271 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
272 dump_conflict_function (outf, cf);
273 if (CF_NONTRIVIAL_P (cf))
275 tree last_iteration = SUB_LAST_CONFLICT (subscript);
276 fprintf (outf, " last_conflict: ");
277 print_generic_stmt (outf, last_iteration, 0);
280 fprintf (outf, " (Subscript distance: ");
281 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
282 fprintf (outf, " )\n");
283 fprintf (outf, " )\n");
286 /* Print the classic direction vector DIRV to OUTF. */
289 print_direction_vector (FILE *outf,
295 for (eq = 0; eq < length; eq++)
297 enum data_dependence_direction dir = ((enum data_dependence_direction)
303 fprintf (outf, " +");
306 fprintf (outf, " -");
309 fprintf (outf, " =");
311 case dir_positive_or_equal:
312 fprintf (outf, " +=");
314 case dir_positive_or_negative:
315 fprintf (outf, " +-");
317 case dir_negative_or_equal:
318 fprintf (outf, " -=");
321 fprintf (outf, " *");
324 fprintf (outf, "indep");
328 fprintf (outf, "\n");
331 /* Print a vector of direction vectors. */
334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
340 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
341 print_direction_vector (outf, v, length);
344 /* Print out a vector VEC of length N to OUTFILE. */
347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
351 for (i = 0; i < n; i++)
352 fprintf (outfile, "%3d ", vector[i]);
353 fprintf (outfile, "\n");
356 /* Print a vector of distance vectors. */
359 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
365 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
366 print_lambda_vector (outf, v, length);
372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
374 dump_data_dependence_relation (stderr, ddr);
377 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
380 dump_data_dependence_relation (FILE *outf,
381 struct data_dependence_relation *ddr)
383 struct data_reference *dra, *drb;
385 fprintf (outf, "(Data Dep: \n");
387 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
394 dump_data_reference (outf, dra);
396 fprintf (outf, " (nil)\n");
398 dump_data_reference (outf, drb);
400 fprintf (outf, " (nil)\n");
402 fprintf (outf, " (don't know)\n)\n");
408 dump_data_reference (outf, dra);
409 dump_data_reference (outf, drb);
411 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
412 fprintf (outf, " (no dependence)\n");
414 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
419 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
421 fprintf (outf, " access_fn_A: ");
422 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
423 fprintf (outf, " access_fn_B: ");
424 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
425 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
428 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
429 fprintf (outf, " loop nest: (");
430 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
431 fprintf (outf, "%d ", loopi->num);
432 fprintf (outf, ")\n");
434 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
436 fprintf (outf, " distance_vector: ");
437 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
441 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
443 fprintf (outf, " direction_vector: ");
444 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
449 fprintf (outf, ")\n");
452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
455 dump_data_dependence_direction (FILE *file,
456 enum data_dependence_direction dir)
472 case dir_positive_or_negative:
473 fprintf (file, "+-");
476 case dir_positive_or_equal:
477 fprintf (file, "+=");
480 case dir_negative_or_equal:
481 fprintf (file, "-=");
493 /* Dumps the distance and direction vectors in FILE. DDRS contains
494 the dependence relations, and VECT_SIZE is the size of the
495 dependence vectors, or in other words the number of loops in the
499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
502 struct data_dependence_relation *ddr;
505 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
506 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
508 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
510 fprintf (file, "DISTANCE_V (");
511 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
515 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
517 fprintf (file, "DIRECTION_V (");
518 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
519 fprintf (file, ")\n");
523 fprintf (file, "\n\n");
526 /* Dumps the data dependence relations DDRS in FILE. */
529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
532 struct data_dependence_relation *ddr;
534 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
535 dump_data_dependence_relation (file, ddr);
537 fprintf (file, "\n\n");
540 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
541 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
542 constant of type ssizetype, and returns true. If we cannot do this
543 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
548 tree *var, tree *off)
552 enum tree_code ocode = code;
560 *var = build_int_cst (type, 0);
561 *off = fold_convert (ssizetype, op0);
564 case POINTER_PLUS_EXPR:
569 split_constant_offset (op0, &var0, &off0);
570 split_constant_offset (op1, &var1, &off1);
571 *var = fold_build2 (code, type, var0, var1);
572 *off = size_binop (ocode, off0, off1);
576 if (TREE_CODE (op1) != INTEGER_CST)
579 split_constant_offset (op0, &var0, &off0);
580 *var = fold_build2 (MULT_EXPR, type, var0, op1);
581 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
587 HOST_WIDE_INT pbitsize, pbitpos;
588 enum machine_mode pmode;
589 int punsignedp, pvolatilep;
591 op0 = TREE_OPERAND (op0, 0);
592 if (!handled_component_p (op0))
595 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
596 &pmode, &punsignedp, &pvolatilep, false);
598 if (pbitpos % BITS_PER_UNIT != 0)
600 base = build_fold_addr_expr (base);
601 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
605 split_constant_offset (poffset, &poffset, &off1);
606 off0 = size_binop (PLUS_EXPR, off0, off1);
607 if (POINTER_TYPE_P (TREE_TYPE (base)))
608 base = fold_build_pointer_plus (base, poffset);
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
614 var0 = fold_convert (type, base);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
680 split_constant_offset (tree exp, tree *var, tree *off)
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
686 *off = ssize_int (0);
689 if (tree_is_chrec (exp)
690 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
693 otype = TREE_TYPE (exp);
694 code = TREE_CODE (exp);
695 extract_ops_from_tree (exp, &code, &op0, &op1);
696 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
698 *var = fold_convert (type, e);
703 /* Returns the address ADDR of an object in a canonical shape (without nop
704 casts, and with type of pointer to the object). */
707 canonicalize_base_object_address (tree addr)
713 /* The base address may be obtained by casting from integer, in that case
715 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
718 if (TREE_CODE (addr) != ADDR_EXPR)
721 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
724 /* Analyzes the behavior of the memory reference DR in the innermost loop or
725 basic block that contains it. Returns true if analysis succeed or false
729 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
731 gimple stmt = DR_STMT (dr);
732 struct loop *loop = loop_containing_stmt (stmt);
733 tree ref = DR_REF (dr);
734 HOST_WIDE_INT pbitsize, pbitpos;
736 enum machine_mode pmode;
737 int punsignedp, pvolatilep;
738 affine_iv base_iv, offset_iv;
739 tree init, dinit, step;
740 bool in_loop = (loop && loop->num);
742 if (dump_file && (dump_flags & TDF_DETAILS))
743 fprintf (dump_file, "analyze_innermost: ");
745 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
746 &pmode, &punsignedp, &pvolatilep, false);
747 gcc_assert (base != NULL_TREE);
749 if (pbitpos % BITS_PER_UNIT != 0)
751 if (dump_file && (dump_flags & TDF_DETAILS))
752 fprintf (dump_file, "failed: bit offset alignment.\n");
756 if (TREE_CODE (base) == MEM_REF)
758 if (!integer_zerop (TREE_OPERAND (base, 1)))
762 double_int moff = mem_ref_offset (base);
763 poffset = double_int_to_tree (sizetype, moff);
766 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
768 base = TREE_OPERAND (base, 0);
771 base = build_fold_addr_expr (base);
775 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
780 if (dump_file && (dump_flags & TDF_DETAILS))
781 fprintf (dump_file, "failed: evolution of base is not"
788 base_iv.step = ssize_int (0);
789 base_iv.no_overflow = true;
796 base_iv.step = ssize_int (0);
797 base_iv.no_overflow = true;
802 offset_iv.base = ssize_int (0);
803 offset_iv.step = ssize_int (0);
809 offset_iv.base = poffset;
810 offset_iv.step = ssize_int (0);
812 else if (!simple_iv (loop, loop_containing_stmt (stmt),
813 poffset, &offset_iv, false))
817 if (dump_file && (dump_flags & TDF_DETAILS))
818 fprintf (dump_file, "failed: evolution of offset is not"
824 offset_iv.base = poffset;
825 offset_iv.step = ssize_int (0);
830 init = ssize_int (pbitpos / BITS_PER_UNIT);
831 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
832 init = size_binop (PLUS_EXPR, init, dinit);
833 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
834 init = size_binop (PLUS_EXPR, init, dinit);
836 step = size_binop (PLUS_EXPR,
837 fold_convert (ssizetype, base_iv.step),
838 fold_convert (ssizetype, offset_iv.step));
840 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
842 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
846 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
848 if (dump_file && (dump_flags & TDF_DETAILS))
849 fprintf (dump_file, "success.\n");
854 /* Determines the base object and the list of indices of memory reference
855 DR, analyzed in LOOP and instantiated in loop nest NEST. */
858 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
860 VEC (tree, heap) *access_fns = NULL;
862 tree base, off, access_fn;
863 basic_block before_loop;
865 /* If analyzing a basic-block there are no indices to analyze
866 and thus no access functions. */
869 DR_BASE_OBJECT (dr) = DR_REF (dr);
870 DR_ACCESS_FNS (dr) = NULL;
874 ref = unshare_expr (DR_REF (dr));
875 before_loop = block_before_loop (nest);
877 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
878 into a two element array with a constant index. The base is
879 then just the immediate underlying object. */
880 if (TREE_CODE (ref) == REALPART_EXPR)
882 ref = TREE_OPERAND (ref, 0);
883 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
885 else if (TREE_CODE (ref) == IMAGPART_EXPR)
887 ref = TREE_OPERAND (ref, 0);
888 VEC_safe_push (tree, heap, access_fns, integer_one_node);
891 /* Analyze access functions of dimensions we know to be independent. */
893 while (handled_component_p (aref))
895 if (TREE_CODE (aref) == ARRAY_REF)
897 op = TREE_OPERAND (aref, 1);
898 access_fn = analyze_scalar_evolution (loop, op);
899 access_fn = instantiate_scev (before_loop, loop, access_fn);
900 VEC_safe_push (tree, heap, access_fns, access_fn);
901 /* For ARRAY_REFs the base is the reference with the index replaced
902 by zero if we can not strip it as the outermost component. */
904 ref = TREE_OPERAND (ref, 0);
906 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
909 aref = TREE_OPERAND (aref, 0);
912 /* If the address operand of a MEM_REF base has an evolution in the
913 analyzed nest, add it as an additional independent access-function. */
914 if (TREE_CODE (aref) == MEM_REF)
916 op = TREE_OPERAND (aref, 0);
917 access_fn = analyze_scalar_evolution (loop, op);
918 access_fn = instantiate_scev (before_loop, loop, access_fn);
919 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
921 base = initial_condition (access_fn);
922 split_constant_offset (base, &base, &off);
923 /* Fold the MEM_REF offset into the evolutions initial
924 value to make more bases comparable. */
925 if (!integer_zerop (TREE_OPERAND (aref, 1)))
927 off = size_binop (PLUS_EXPR, off,
928 fold_convert (ssizetype,
929 TREE_OPERAND (aref, 1)));
930 TREE_OPERAND (aref, 1)
931 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
933 access_fn = chrec_replace_initial_condition
934 (access_fn, fold_convert (TREE_TYPE (base), off));
935 TREE_OPERAND (aref, 0) = base;
936 VEC_safe_push (tree, heap, access_fns, access_fn);
940 DR_BASE_OBJECT (dr) = ref;
941 DR_ACCESS_FNS (dr) = access_fns;
944 /* Extracts the alias analysis information from the memory reference DR. */
947 dr_analyze_alias (struct data_reference *dr)
949 tree ref = DR_REF (dr);
950 tree base = get_base_address (ref), addr;
952 if (INDIRECT_REF_P (base)
953 || TREE_CODE (base) == MEM_REF)
955 addr = TREE_OPERAND (base, 0);
956 if (TREE_CODE (addr) == SSA_NAME)
957 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
961 /* Frees data reference DR. */
964 free_data_ref (data_reference_p dr)
966 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
970 /* Analyzes memory reference MEMREF accessed in STMT. The reference
971 is read if IS_READ is true, write otherwise. Returns the
972 data_reference description of MEMREF. NEST is the outermost loop
973 in which the reference should be instantiated, LOOP is the loop in
974 which the data reference should be analyzed. */
976 struct data_reference *
977 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
980 struct data_reference *dr;
982 if (dump_file && (dump_flags & TDF_DETAILS))
984 fprintf (dump_file, "Creating dr for ");
985 print_generic_expr (dump_file, memref, TDF_SLIM);
986 fprintf (dump_file, "\n");
989 dr = XCNEW (struct data_reference);
991 DR_REF (dr) = memref;
992 DR_IS_READ (dr) = is_read;
994 dr_analyze_innermost (dr, nest);
995 dr_analyze_indices (dr, nest, loop);
996 dr_analyze_alias (dr);
998 if (dump_file && (dump_flags & TDF_DETAILS))
1001 fprintf (dump_file, "\tbase_address: ");
1002 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1003 fprintf (dump_file, "\n\toffset from base address: ");
1004 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1005 fprintf (dump_file, "\n\tconstant offset from base address: ");
1006 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1007 fprintf (dump_file, "\n\tstep: ");
1008 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1009 fprintf (dump_file, "\n\taligned to: ");
1010 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1011 fprintf (dump_file, "\n\tbase_object: ");
1012 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1013 fprintf (dump_file, "\n");
1014 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1016 fprintf (dump_file, "\tAccess function %d: ", i);
1017 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1024 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1027 dr_equal_offsets_p1 (tree offset1, tree offset2)
1031 STRIP_NOPS (offset1);
1032 STRIP_NOPS (offset2);
1034 if (offset1 == offset2)
1037 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1038 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1041 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1042 TREE_OPERAND (offset2, 0));
1044 if (!res || !BINARY_CLASS_P (offset1))
1047 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1048 TREE_OPERAND (offset2, 1));
1053 /* Check if DRA and DRB have equal offsets. */
1055 dr_equal_offsets_p (struct data_reference *dra,
1056 struct data_reference *drb)
1058 tree offset1, offset2;
1060 offset1 = DR_OFFSET (dra);
1061 offset2 = DR_OFFSET (drb);
1063 return dr_equal_offsets_p1 (offset1, offset2);
1066 /* Returns true if FNA == FNB. */
1069 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1071 unsigned i, n = VEC_length (tree, fna);
1073 if (n != VEC_length (tree, fnb))
1076 for (i = 0; i < n; i++)
1077 if (!operand_equal_p (VEC_index (tree, fna, i),
1078 VEC_index (tree, fnb, i), 0))
1084 /* If all the functions in CF are the same, returns one of them,
1085 otherwise returns NULL. */
1088 common_affine_function (conflict_function *cf)
1093 if (!CF_NONTRIVIAL_P (cf))
1098 for (i = 1; i < cf->n; i++)
1099 if (!affine_function_equal_p (comm, cf->fns[i]))
1105 /* Returns the base of the affine function FN. */
1108 affine_function_base (affine_fn fn)
1110 return VEC_index (tree, fn, 0);
1113 /* Returns true if FN is a constant. */
1116 affine_function_constant_p (affine_fn fn)
1121 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1122 if (!integer_zerop (coef))
1128 /* Returns true if FN is the zero constant function. */
1131 affine_function_zero_p (affine_fn fn)
1133 return (integer_zerop (affine_function_base (fn))
1134 && affine_function_constant_p (fn));
1137 /* Returns a signed integer type with the largest precision from TA
1141 signed_type_for_types (tree ta, tree tb)
1143 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1144 return signed_type_for (ta);
1146 return signed_type_for (tb);
1149 /* Applies operation OP on affine functions FNA and FNB, and returns the
1153 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1159 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1161 n = VEC_length (tree, fna);
1162 m = VEC_length (tree, fnb);
1166 n = VEC_length (tree, fnb);
1167 m = VEC_length (tree, fna);
1170 ret = VEC_alloc (tree, heap, m);
1171 for (i = 0; i < n; i++)
1173 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1174 TREE_TYPE (VEC_index (tree, fnb, i)));
1176 VEC_quick_push (tree, ret,
1177 fold_build2 (op, type,
1178 VEC_index (tree, fna, i),
1179 VEC_index (tree, fnb, i)));
1182 for (; VEC_iterate (tree, fna, i, coef); i++)
1183 VEC_quick_push (tree, ret,
1184 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1185 coef, integer_zero_node));
1186 for (; VEC_iterate (tree, fnb, i, coef); i++)
1187 VEC_quick_push (tree, ret,
1188 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1189 integer_zero_node, coef));
1194 /* Returns the sum of affine functions FNA and FNB. */
1197 affine_fn_plus (affine_fn fna, affine_fn fnb)
1199 return affine_fn_op (PLUS_EXPR, fna, fnb);
1202 /* Returns the difference of affine functions FNA and FNB. */
1205 affine_fn_minus (affine_fn fna, affine_fn fnb)
1207 return affine_fn_op (MINUS_EXPR, fna, fnb);
1210 /* Frees affine function FN. */
1213 affine_fn_free (affine_fn fn)
1215 VEC_free (tree, heap, fn);
1218 /* Determine for each subscript in the data dependence relation DDR
1222 compute_subscript_distance (struct data_dependence_relation *ddr)
1224 conflict_function *cf_a, *cf_b;
1225 affine_fn fn_a, fn_b, diff;
1227 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1231 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1233 struct subscript *subscript;
1235 subscript = DDR_SUBSCRIPT (ddr, i);
1236 cf_a = SUB_CONFLICTS_IN_A (subscript);
1237 cf_b = SUB_CONFLICTS_IN_B (subscript);
1239 fn_a = common_affine_function (cf_a);
1240 fn_b = common_affine_function (cf_b);
1243 SUB_DISTANCE (subscript) = chrec_dont_know;
1246 diff = affine_fn_minus (fn_a, fn_b);
1248 if (affine_function_constant_p (diff))
1249 SUB_DISTANCE (subscript) = affine_function_base (diff);
1251 SUB_DISTANCE (subscript) = chrec_dont_know;
1253 affine_fn_free (diff);
1258 /* Returns the conflict function for "unknown". */
1260 static conflict_function *
1261 conflict_fn_not_known (void)
1263 conflict_function *fn = XCNEW (conflict_function);
1269 /* Returns the conflict function for "independent". */
1271 static conflict_function *
1272 conflict_fn_no_dependence (void)
1274 conflict_function *fn = XCNEW (conflict_function);
1275 fn->n = NO_DEPENDENCE;
1280 /* Returns true if the address of OBJ is invariant in LOOP. */
1283 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1285 while (handled_component_p (obj))
1287 if (TREE_CODE (obj) == ARRAY_REF)
1289 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1290 need to check the stride and the lower bound of the reference. */
1291 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1293 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1297 else if (TREE_CODE (obj) == COMPONENT_REF)
1299 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1303 obj = TREE_OPERAND (obj, 0);
1306 if (!INDIRECT_REF_P (obj)
1307 && TREE_CODE (obj) != MEM_REF)
1310 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1314 /* Returns false if we can prove that data references A and B do not alias,
1315 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1319 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1322 tree addr_a = DR_BASE_OBJECT (a);
1323 tree addr_b = DR_BASE_OBJECT (b);
1325 /* If we are not processing a loop nest but scalar code we
1326 do not need to care about possible cross-iteration dependences
1327 and thus can process the full original reference. Do so,
1328 similar to how loop invariant motion applies extra offset-based
1332 aff_tree off1, off2;
1333 double_int size1, size2;
1334 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1335 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1336 aff_combination_scale (&off1, double_int_minus_one);
1337 aff_combination_add (&off2, &off1);
1338 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1342 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1343 return refs_output_dependent_p (addr_a, addr_b);
1344 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1345 return refs_anti_dependent_p (addr_a, addr_b);
1346 return refs_may_alias_p (addr_a, addr_b);
1349 static void compute_self_dependence (struct data_dependence_relation *);
1351 /* Initialize a data dependence relation between data accesses A and
1352 B. NB_LOOPS is the number of loops surrounding the references: the
1353 size of the classic distance/direction vectors. */
1355 static struct data_dependence_relation *
1356 initialize_data_dependence_relation (struct data_reference *a,
1357 struct data_reference *b,
1358 VEC (loop_p, heap) *loop_nest)
1360 struct data_dependence_relation *res;
1363 res = XNEW (struct data_dependence_relation);
1366 DDR_LOOP_NEST (res) = NULL;
1367 DDR_REVERSED_P (res) = false;
1368 DDR_SUBSCRIPTS (res) = NULL;
1369 DDR_DIR_VECTS (res) = NULL;
1370 DDR_DIST_VECTS (res) = NULL;
1372 if (a == NULL || b == NULL)
1374 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1378 /* If the data references do not alias, then they are independent. */
1379 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1381 DDR_ARE_DEPENDENT (res) = chrec_known;
1385 /* When the references are exactly the same, don't spend time doing
1386 the data dependence tests, just initialize the ddr and return. */
1387 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1389 DDR_AFFINE_P (res) = true;
1390 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1391 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1392 DDR_LOOP_NEST (res) = loop_nest;
1393 DDR_INNER_LOOP (res) = 0;
1394 DDR_SELF_REFERENCE (res) = true;
1395 compute_self_dependence (res);
1399 /* If the references do not access the same object, we do not know
1400 whether they alias or not. */
1401 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1403 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1407 /* If the base of the object is not invariant in the loop nest, we cannot
1408 analyze it. TODO -- in fact, it would suffice to record that there may
1409 be arbitrary dependences in the loops where the base object varies. */
1411 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1412 DR_BASE_OBJECT (a)))
1414 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1418 /* If the number of dimensions of the access to not agree we can have
1419 a pointer access to a component of the array element type and an
1420 array access while the base-objects are still the same. Punt. */
1421 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1423 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1427 DDR_AFFINE_P (res) = true;
1428 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1429 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1430 DDR_LOOP_NEST (res) = loop_nest;
1431 DDR_INNER_LOOP (res) = 0;
1432 DDR_SELF_REFERENCE (res) = false;
1434 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1436 struct subscript *subscript;
1438 subscript = XNEW (struct subscript);
1439 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1440 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1441 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1442 SUB_DISTANCE (subscript) = chrec_dont_know;
1443 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1449 /* Frees memory used by the conflict function F. */
1452 free_conflict_function (conflict_function *f)
1456 if (CF_NONTRIVIAL_P (f))
1458 for (i = 0; i < f->n; i++)
1459 affine_fn_free (f->fns[i]);
1464 /* Frees memory used by SUBSCRIPTS. */
1467 free_subscripts (VEC (subscript_p, heap) *subscripts)
1472 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1474 free_conflict_function (s->conflicting_iterations_in_a);
1475 free_conflict_function (s->conflicting_iterations_in_b);
1478 VEC_free (subscript_p, heap, subscripts);
1481 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1485 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1488 if (dump_file && (dump_flags & TDF_DETAILS))
1490 fprintf (dump_file, "(dependence classified: ");
1491 print_generic_expr (dump_file, chrec, 0);
1492 fprintf (dump_file, ")\n");
1495 DDR_ARE_DEPENDENT (ddr) = chrec;
1496 free_subscripts (DDR_SUBSCRIPTS (ddr));
1497 DDR_SUBSCRIPTS (ddr) = NULL;
1500 /* The dependence relation DDR cannot be represented by a distance
1504 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1506 if (dump_file && (dump_flags & TDF_DETAILS))
1507 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1509 DDR_AFFINE_P (ddr) = false;
1514 /* This section contains the classic Banerjee tests. */
1516 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1517 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1520 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1522 return (evolution_function_is_constant_p (chrec_a)
1523 && evolution_function_is_constant_p (chrec_b));
1526 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1527 variable, i.e., if the SIV (Single Index Variable) test is true. */
1530 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1532 if ((evolution_function_is_constant_p (chrec_a)
1533 && evolution_function_is_univariate_p (chrec_b))
1534 || (evolution_function_is_constant_p (chrec_b)
1535 && evolution_function_is_univariate_p (chrec_a)))
1538 if (evolution_function_is_univariate_p (chrec_a)
1539 && evolution_function_is_univariate_p (chrec_b))
1541 switch (TREE_CODE (chrec_a))
1543 case POLYNOMIAL_CHREC:
1544 switch (TREE_CODE (chrec_b))
1546 case POLYNOMIAL_CHREC:
1547 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1562 /* Creates a conflict function with N dimensions. The affine functions
1563 in each dimension follow. */
1565 static conflict_function *
1566 conflict_fn (unsigned n, ...)
1569 conflict_function *ret = XCNEW (conflict_function);
1572 gcc_assert (0 < n && n <= MAX_DIM);
1576 for (i = 0; i < n; i++)
1577 ret->fns[i] = va_arg (ap, affine_fn);
1583 /* Returns constant affine function with value CST. */
1586 affine_fn_cst (tree cst)
1588 affine_fn fn = VEC_alloc (tree, heap, 1);
1589 VEC_quick_push (tree, fn, cst);
1593 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1596 affine_fn_univar (tree cst, unsigned dim, tree coef)
1598 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1601 gcc_assert (dim > 0);
1602 VEC_quick_push (tree, fn, cst);
1603 for (i = 1; i < dim; i++)
1604 VEC_quick_push (tree, fn, integer_zero_node);
1605 VEC_quick_push (tree, fn, coef);
1609 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1610 *OVERLAPS_B are initialized to the functions that describe the
1611 relation between the elements accessed twice by CHREC_A and
1612 CHREC_B. For k >= 0, the following property is verified:
1614 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1617 analyze_ziv_subscript (tree chrec_a,
1619 conflict_function **overlaps_a,
1620 conflict_function **overlaps_b,
1621 tree *last_conflicts)
1623 tree type, difference;
1624 dependence_stats.num_ziv++;
1626 if (dump_file && (dump_flags & TDF_DETAILS))
1627 fprintf (dump_file, "(analyze_ziv_subscript \n");
1629 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1630 chrec_a = chrec_convert (type, chrec_a, NULL);
1631 chrec_b = chrec_convert (type, chrec_b, NULL);
1632 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1634 switch (TREE_CODE (difference))
1637 if (integer_zerop (difference))
1639 /* The difference is equal to zero: the accessed index
1640 overlaps for each iteration in the loop. */
1641 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1642 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1643 *last_conflicts = chrec_dont_know;
1644 dependence_stats.num_ziv_dependent++;
1648 /* The accesses do not overlap. */
1649 *overlaps_a = conflict_fn_no_dependence ();
1650 *overlaps_b = conflict_fn_no_dependence ();
1651 *last_conflicts = integer_zero_node;
1652 dependence_stats.num_ziv_independent++;
1657 /* We're not sure whether the indexes overlap. For the moment,
1658 conservatively answer "don't know". */
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1660 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1662 *overlaps_a = conflict_fn_not_known ();
1663 *overlaps_b = conflict_fn_not_known ();
1664 *last_conflicts = chrec_dont_know;
1665 dependence_stats.num_ziv_unimplemented++;
1669 if (dump_file && (dump_flags & TDF_DETAILS))
1670 fprintf (dump_file, ")\n");
1673 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1674 and only if it fits to the int type. If this is not the case, or the
1675 bound on the number of iterations of LOOP could not be derived, returns
1679 max_stmt_executions_tree (struct loop *loop)
1683 if (!max_stmt_executions (loop, true, &nit))
1684 return chrec_dont_know;
1686 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1687 return chrec_dont_know;
1689 return double_int_to_tree (unsigned_type_node, nit);
1692 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1693 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1694 *OVERLAPS_B are initialized to the functions that describe the
1695 relation between the elements accessed twice by CHREC_A and
1696 CHREC_B. For k >= 0, the following property is verified:
1698 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1701 analyze_siv_subscript_cst_affine (tree chrec_a,
1703 conflict_function **overlaps_a,
1704 conflict_function **overlaps_b,
1705 tree *last_conflicts)
1707 bool value0, value1, value2;
1708 tree type, difference, tmp;
1710 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1711 chrec_a = chrec_convert (type, chrec_a, NULL);
1712 chrec_b = chrec_convert (type, chrec_b, NULL);
1713 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1715 if (!chrec_is_positive (initial_condition (difference), &value0))
1717 if (dump_file && (dump_flags & TDF_DETAILS))
1718 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1720 dependence_stats.num_siv_unimplemented++;
1721 *overlaps_a = conflict_fn_not_known ();
1722 *overlaps_b = conflict_fn_not_known ();
1723 *last_conflicts = chrec_dont_know;
1728 if (value0 == false)
1730 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1732 if (dump_file && (dump_flags & TDF_DETAILS))
1733 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1735 *overlaps_a = conflict_fn_not_known ();
1736 *overlaps_b = conflict_fn_not_known ();
1737 *last_conflicts = chrec_dont_know;
1738 dependence_stats.num_siv_unimplemented++;
1747 chrec_b = {10, +, 1}
1750 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1752 HOST_WIDE_INT numiter;
1753 struct loop *loop = get_chrec_loop (chrec_b);
1755 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1756 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1757 fold_build1 (ABS_EXPR, type, difference),
1758 CHREC_RIGHT (chrec_b));
1759 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1760 *last_conflicts = integer_one_node;
1763 /* Perform weak-zero siv test to see if overlap is
1764 outside the loop bounds. */
1765 numiter = max_stmt_executions_int (loop, true);
1768 && compare_tree_int (tmp, numiter) > 0)
1770 free_conflict_function (*overlaps_a);
1771 free_conflict_function (*overlaps_b);
1772 *overlaps_a = conflict_fn_no_dependence ();
1773 *overlaps_b = conflict_fn_no_dependence ();
1774 *last_conflicts = integer_zero_node;
1775 dependence_stats.num_siv_independent++;
1778 dependence_stats.num_siv_dependent++;
1782 /* When the step does not divide the difference, there are
1786 *overlaps_a = conflict_fn_no_dependence ();
1787 *overlaps_b = conflict_fn_no_dependence ();
1788 *last_conflicts = integer_zero_node;
1789 dependence_stats.num_siv_independent++;
1798 chrec_b = {10, +, -1}
1800 In this case, chrec_a will not overlap with chrec_b. */
1801 *overlaps_a = conflict_fn_no_dependence ();
1802 *overlaps_b = conflict_fn_no_dependence ();
1803 *last_conflicts = integer_zero_node;
1804 dependence_stats.num_siv_independent++;
1811 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1813 if (dump_file && (dump_flags & TDF_DETAILS))
1814 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1816 *overlaps_a = conflict_fn_not_known ();
1817 *overlaps_b = conflict_fn_not_known ();
1818 *last_conflicts = chrec_dont_know;
1819 dependence_stats.num_siv_unimplemented++;
1824 if (value2 == false)
1828 chrec_b = {10, +, -1}
1830 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1832 HOST_WIDE_INT numiter;
1833 struct loop *loop = get_chrec_loop (chrec_b);
1835 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1836 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1837 CHREC_RIGHT (chrec_b));
1838 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1839 *last_conflicts = integer_one_node;
1841 /* Perform weak-zero siv test to see if overlap is
1842 outside the loop bounds. */
1843 numiter = max_stmt_executions_int (loop, true);
1846 && compare_tree_int (tmp, numiter) > 0)
1848 free_conflict_function (*overlaps_a);
1849 free_conflict_function (*overlaps_b);
1850 *overlaps_a = conflict_fn_no_dependence ();
1851 *overlaps_b = conflict_fn_no_dependence ();
1852 *last_conflicts = integer_zero_node;
1853 dependence_stats.num_siv_independent++;
1856 dependence_stats.num_siv_dependent++;
1860 /* When the step does not divide the difference, there
1864 *overlaps_a = conflict_fn_no_dependence ();
1865 *overlaps_b = conflict_fn_no_dependence ();
1866 *last_conflicts = integer_zero_node;
1867 dependence_stats.num_siv_independent++;
1877 In this case, chrec_a will not overlap with chrec_b. */
1878 *overlaps_a = conflict_fn_no_dependence ();
1879 *overlaps_b = conflict_fn_no_dependence ();
1880 *last_conflicts = integer_zero_node;
1881 dependence_stats.num_siv_independent++;
1889 /* Helper recursive function for initializing the matrix A. Returns
1890 the initial value of CHREC. */
1893 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1897 switch (TREE_CODE (chrec))
1899 case POLYNOMIAL_CHREC:
1900 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1902 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1903 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1909 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1910 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1912 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1917 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1918 return chrec_convert (chrec_type (chrec), op, NULL);
1923 /* Handle ~X as -1 - X. */
1924 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1925 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1926 build_int_cst (TREE_TYPE (chrec), -1), op);
1938 #define FLOOR_DIV(x,y) ((x) / (y))
1940 /* Solves the special case of the Diophantine equation:
1941 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1943 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1944 number of iterations that loops X and Y run. The overlaps will be
1945 constructed as evolutions in dimension DIM. */
1948 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1949 affine_fn *overlaps_a,
1950 affine_fn *overlaps_b,
1951 tree *last_conflicts, int dim)
1953 if (((step_a > 0 && step_b > 0)
1954 || (step_a < 0 && step_b < 0)))
1956 int step_overlaps_a, step_overlaps_b;
1957 int gcd_steps_a_b, last_conflict, tau2;
1959 gcd_steps_a_b = gcd (step_a, step_b);
1960 step_overlaps_a = step_b / gcd_steps_a_b;
1961 step_overlaps_b = step_a / gcd_steps_a_b;
1965 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1966 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1967 last_conflict = tau2;
1968 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1971 *last_conflicts = chrec_dont_know;
1973 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1974 build_int_cst (NULL_TREE,
1976 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1977 build_int_cst (NULL_TREE,
1983 *overlaps_a = affine_fn_cst (integer_zero_node);
1984 *overlaps_b = affine_fn_cst (integer_zero_node);
1985 *last_conflicts = integer_zero_node;
1989 /* Solves the special case of a Diophantine equation where CHREC_A is
1990 an affine bivariate function, and CHREC_B is an affine univariate
1991 function. For example,
1993 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1995 has the following overlapping functions:
1997 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1998 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1999 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2001 FORNOW: This is a specialized implementation for a case occurring in
2002 a common benchmark. Implement the general algorithm. */
2005 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2006 conflict_function **overlaps_a,
2007 conflict_function **overlaps_b,
2008 tree *last_conflicts)
2010 bool xz_p, yz_p, xyz_p;
2011 int step_x, step_y, step_z;
2012 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2013 affine_fn overlaps_a_xz, overlaps_b_xz;
2014 affine_fn overlaps_a_yz, overlaps_b_yz;
2015 affine_fn overlaps_a_xyz, overlaps_b_xyz;
2016 affine_fn ova1, ova2, ovb;
2017 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2019 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2020 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2021 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2024 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2025 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2026 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2028 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2030 if (dump_file && (dump_flags & TDF_DETAILS))
2031 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2033 *overlaps_a = conflict_fn_not_known ();
2034 *overlaps_b = conflict_fn_not_known ();
2035 *last_conflicts = chrec_dont_know;
2039 niter = MIN (niter_x, niter_z);
2040 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2043 &last_conflicts_xz, 1);
2044 niter = MIN (niter_y, niter_z);
2045 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2048 &last_conflicts_yz, 2);
2049 niter = MIN (niter_x, niter_z);
2050 niter = MIN (niter_y, niter);
2051 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2054 &last_conflicts_xyz, 3);
2056 xz_p = !integer_zerop (last_conflicts_xz);
2057 yz_p = !integer_zerop (last_conflicts_yz);
2058 xyz_p = !integer_zerop (last_conflicts_xyz);
2060 if (xz_p || yz_p || xyz_p)
2062 ova1 = affine_fn_cst (integer_zero_node);
2063 ova2 = affine_fn_cst (integer_zero_node);
2064 ovb = affine_fn_cst (integer_zero_node);
2067 affine_fn t0 = ova1;
2070 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2071 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2072 affine_fn_free (t0);
2073 affine_fn_free (t2);
2074 *last_conflicts = last_conflicts_xz;
2078 affine_fn t0 = ova2;
2081 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2082 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2083 affine_fn_free (t0);
2084 affine_fn_free (t2);
2085 *last_conflicts = last_conflicts_yz;
2089 affine_fn t0 = ova1;
2090 affine_fn t2 = ova2;
2093 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2094 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2095 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2096 affine_fn_free (t0);
2097 affine_fn_free (t2);
2098 affine_fn_free (t4);
2099 *last_conflicts = last_conflicts_xyz;
2101 *overlaps_a = conflict_fn (2, ova1, ova2);
2102 *overlaps_b = conflict_fn (1, ovb);
2106 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2107 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2108 *last_conflicts = integer_zero_node;
2111 affine_fn_free (overlaps_a_xz);
2112 affine_fn_free (overlaps_b_xz);
2113 affine_fn_free (overlaps_a_yz);
2114 affine_fn_free (overlaps_b_yz);
2115 affine_fn_free (overlaps_a_xyz);
2116 affine_fn_free (overlaps_b_xyz);
2119 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2122 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2125 memcpy (vec2, vec1, size * sizeof (*vec1));
2128 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2131 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2136 for (i = 0; i < m; i++)
2137 lambda_vector_copy (mat1[i], mat2[i], n);
2140 /* Store the N x N identity matrix in MAT. */
2143 lambda_matrix_id (lambda_matrix mat, int size)
2147 for (i = 0; i < size; i++)
2148 for (j = 0; j < size; j++)
2149 mat[i][j] = (i == j) ? 1 : 0;
2152 /* Return the first nonzero element of vector VEC1 between START and N.
2153 We must have START <= N. Returns N if VEC1 is the zero vector. */
2156 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2159 while (j < n && vec1[j] == 0)
2164 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2165 R2 = R2 + CONST1 * R1. */
2168 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2175 for (i = 0; i < n; i++)
2176 mat[r2][i] += const1 * mat[r1][i];
2179 /* Swap rows R1 and R2 in matrix MAT. */
2182 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2191 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2192 and store the result in VEC2. */
2195 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2196 int size, int const1)
2201 lambda_vector_clear (vec2, size);
2203 for (i = 0; i < size; i++)
2204 vec2[i] = const1 * vec1[i];
2207 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2210 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2213 lambda_vector_mult_const (vec1, vec2, size, -1);
2216 /* Negate row R1 of matrix MAT which has N columns. */
2219 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2221 lambda_vector_negate (mat[r1], mat[r1], n);
2224 /* Return true if two vectors are equal. */
2227 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2230 for (i = 0; i < size; i++)
2231 if (vec1[i] != vec2[i])
2236 /* Given an M x N integer matrix A, this function determines an M x
2237 M unimodular matrix U, and an M x N echelon matrix S such that
2238 "U.A = S". This decomposition is also known as "right Hermite".
2240 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2241 Restructuring Compilers" Utpal Banerjee. */
2244 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2245 lambda_matrix S, lambda_matrix U)
2249 lambda_matrix_copy (A, S, m, n);
2250 lambda_matrix_id (U, m);
2252 for (j = 0; j < n; j++)
2254 if (lambda_vector_first_nz (S[j], m, i0) < m)
2257 for (i = m - 1; i >= i0; i--)
2259 while (S[i][j] != 0)
2261 int sigma, factor, a, b;
2265 sigma = (a * b < 0) ? -1: 1;
2268 factor = sigma * (a / b);
2270 lambda_matrix_row_add (S, n, i, i-1, -factor);
2271 lambda_matrix_row_exchange (S, i, i-1);
2273 lambda_matrix_row_add (U, m, i, i-1, -factor);
2274 lambda_matrix_row_exchange (U, i, i-1);
2281 /* Determines the overlapping elements due to accesses CHREC_A and
2282 CHREC_B, that are affine functions. This function cannot handle
2283 symbolic evolution functions, ie. when initial conditions are
2284 parameters, because it uses lambda matrices of integers. */
2287 analyze_subscript_affine_affine (tree chrec_a,
2289 conflict_function **overlaps_a,
2290 conflict_function **overlaps_b,
2291 tree *last_conflicts)
2293 unsigned nb_vars_a, nb_vars_b, dim;
2294 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2295 lambda_matrix A, U, S;
2296 struct obstack scratch_obstack;
2298 if (eq_evolutions_p (chrec_a, chrec_b))
2300 /* The accessed index overlaps for each iteration in the
2302 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2303 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2304 *last_conflicts = chrec_dont_know;
2307 if (dump_file && (dump_flags & TDF_DETAILS))
2308 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2310 /* For determining the initial intersection, we have to solve a
2311 Diophantine equation. This is the most time consuming part.
2313 For answering to the question: "Is there a dependence?" we have
2314 to prove that there exists a solution to the Diophantine
2315 equation, and that the solution is in the iteration domain,
2316 i.e. the solution is positive or zero, and that the solution
2317 happens before the upper bound loop.nb_iterations. Otherwise
2318 there is no dependence. This function outputs a description of
2319 the iterations that hold the intersections. */
2321 nb_vars_a = nb_vars_in_chrec (chrec_a);
2322 nb_vars_b = nb_vars_in_chrec (chrec_b);
2324 gcc_obstack_init (&scratch_obstack);
2326 dim = nb_vars_a + nb_vars_b;
2327 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2328 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2329 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2331 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2332 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2333 gamma = init_b - init_a;
2335 /* Don't do all the hard work of solving the Diophantine equation
2336 when we already know the solution: for example,
2339 | gamma = 3 - 3 = 0.
2340 Then the first overlap occurs during the first iterations:
2341 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2345 if (nb_vars_a == 1 && nb_vars_b == 1)
2347 HOST_WIDE_INT step_a, step_b;
2348 HOST_WIDE_INT niter, niter_a, niter_b;
2351 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2352 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2353 niter = MIN (niter_a, niter_b);
2354 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2355 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2357 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2360 *overlaps_a = conflict_fn (1, ova);
2361 *overlaps_b = conflict_fn (1, ovb);
2364 else if (nb_vars_a == 2 && nb_vars_b == 1)
2365 compute_overlap_steps_for_affine_1_2
2366 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2368 else if (nb_vars_a == 1 && nb_vars_b == 2)
2369 compute_overlap_steps_for_affine_1_2
2370 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2374 if (dump_file && (dump_flags & TDF_DETAILS))
2375 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2376 *overlaps_a = conflict_fn_not_known ();
2377 *overlaps_b = conflict_fn_not_known ();
2378 *last_conflicts = chrec_dont_know;
2380 goto end_analyze_subs_aa;
2384 lambda_matrix_right_hermite (A, dim, 1, S, U);
2389 lambda_matrix_row_negate (U, dim, 0);
2391 gcd_alpha_beta = S[0][0];
2393 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2394 but that is a quite strange case. Instead of ICEing, answer
2396 if (gcd_alpha_beta == 0)
2398 *overlaps_a = conflict_fn_not_known ();
2399 *overlaps_b = conflict_fn_not_known ();
2400 *last_conflicts = chrec_dont_know;
2401 goto end_analyze_subs_aa;
2404 /* The classic "gcd-test". */
2405 if (!int_divides_p (gcd_alpha_beta, gamma))
2407 /* The "gcd-test" has determined that there is no integer
2408 solution, i.e. there is no dependence. */
2409 *overlaps_a = conflict_fn_no_dependence ();
2410 *overlaps_b = conflict_fn_no_dependence ();
2411 *last_conflicts = integer_zero_node;
2414 /* Both access functions are univariate. This includes SIV and MIV cases. */
2415 else if (nb_vars_a == 1 && nb_vars_b == 1)
2417 /* Both functions should have the same evolution sign. */
2418 if (((A[0][0] > 0 && -A[1][0] > 0)
2419 || (A[0][0] < 0 && -A[1][0] < 0)))
2421 /* The solutions are given by:
2423 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2426 For a given integer t. Using the following variables,
2428 | i0 = u11 * gamma / gcd_alpha_beta
2429 | j0 = u12 * gamma / gcd_alpha_beta
2436 | y0 = j0 + j1 * t. */
2437 HOST_WIDE_INT i0, j0, i1, j1;
2439 i0 = U[0][0] * gamma / gcd_alpha_beta;
2440 j0 = U[0][1] * gamma / gcd_alpha_beta;
2444 if ((i1 == 0 && i0 < 0)
2445 || (j1 == 0 && j0 < 0))
2447 /* There is no solution.
2448 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2449 falls in here, but for the moment we don't look at the
2450 upper bound of the iteration domain. */
2451 *overlaps_a = conflict_fn_no_dependence ();
2452 *overlaps_b = conflict_fn_no_dependence ();
2453 *last_conflicts = integer_zero_node;
2454 goto end_analyze_subs_aa;
2457 if (i1 > 0 && j1 > 0)
2459 HOST_WIDE_INT niter_a = max_stmt_executions_int
2460 (get_chrec_loop (chrec_a), true);
2461 HOST_WIDE_INT niter_b = max_stmt_executions_int
2462 (get_chrec_loop (chrec_b), true);
2463 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2465 /* (X0, Y0) is a solution of the Diophantine equation:
2466 "chrec_a (X0) = chrec_b (Y0)". */
2467 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2469 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2470 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2472 /* (X1, Y1) is the smallest positive solution of the eq
2473 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2474 first conflict occurs. */
2475 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2476 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2477 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2481 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2482 FLOOR_DIV (niter - j0, j1));
2483 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2485 /* If the overlap occurs outside of the bounds of the
2486 loop, there is no dependence. */
2487 if (x1 >= niter || y1 >= niter)
2489 *overlaps_a = conflict_fn_no_dependence ();
2490 *overlaps_b = conflict_fn_no_dependence ();
2491 *last_conflicts = integer_zero_node;
2492 goto end_analyze_subs_aa;
2495 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2498 *last_conflicts = chrec_dont_know;
2502 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2504 build_int_cst (NULL_TREE, i1)));
2507 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2509 build_int_cst (NULL_TREE, j1)));
2513 /* FIXME: For the moment, the upper bound of the
2514 iteration domain for i and j is not checked. */
2515 if (dump_file && (dump_flags & TDF_DETAILS))
2516 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2517 *overlaps_a = conflict_fn_not_known ();
2518 *overlaps_b = conflict_fn_not_known ();
2519 *last_conflicts = chrec_dont_know;
2524 if (dump_file && (dump_flags & TDF_DETAILS))
2525 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2526 *overlaps_a = conflict_fn_not_known ();
2527 *overlaps_b = conflict_fn_not_known ();
2528 *last_conflicts = chrec_dont_know;
2533 if (dump_file && (dump_flags & TDF_DETAILS))
2534 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2535 *overlaps_a = conflict_fn_not_known ();
2536 *overlaps_b = conflict_fn_not_known ();
2537 *last_conflicts = chrec_dont_know;
2540 end_analyze_subs_aa:
2541 obstack_free (&scratch_obstack, NULL);
2542 if (dump_file && (dump_flags & TDF_DETAILS))
2544 fprintf (dump_file, " (overlaps_a = ");
2545 dump_conflict_function (dump_file, *overlaps_a);
2546 fprintf (dump_file, ")\n (overlaps_b = ");
2547 dump_conflict_function (dump_file, *overlaps_b);
2548 fprintf (dump_file, ")\n");
2549 fprintf (dump_file, ")\n");
2553 /* Returns true when analyze_subscript_affine_affine can be used for
2554 determining the dependence relation between chrec_a and chrec_b,
2555 that contain symbols. This function modifies chrec_a and chrec_b
2556 such that the analysis result is the same, and such that they don't
2557 contain symbols, and then can safely be passed to the analyzer.
2559 Example: The analysis of the following tuples of evolutions produce
2560 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2563 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2564 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2568 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2570 tree diff, type, left_a, left_b, right_b;
2572 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2573 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2574 /* FIXME: For the moment not handled. Might be refined later. */
2577 type = chrec_type (*chrec_a);
2578 left_a = CHREC_LEFT (*chrec_a);
2579 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2580 diff = chrec_fold_minus (type, left_a, left_b);
2582 if (!evolution_function_is_constant_p (diff))
2585 if (dump_file && (dump_flags & TDF_DETAILS))
2586 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2588 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2589 diff, CHREC_RIGHT (*chrec_a));
2590 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2591 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2592 build_int_cst (type, 0),
2597 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2598 *OVERLAPS_B are initialized to the functions that describe the
2599 relation between the elements accessed twice by CHREC_A and
2600 CHREC_B. For k >= 0, the following property is verified:
2602 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2605 analyze_siv_subscript (tree chrec_a,
2607 conflict_function **overlaps_a,
2608 conflict_function **overlaps_b,
2609 tree *last_conflicts,
2612 dependence_stats.num_siv++;
2614 if (dump_file && (dump_flags & TDF_DETAILS))
2615 fprintf (dump_file, "(analyze_siv_subscript \n");
2617 if (evolution_function_is_constant_p (chrec_a)
2618 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2619 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2620 overlaps_a, overlaps_b, last_conflicts);
2622 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2623 && evolution_function_is_constant_p (chrec_b))
2624 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2625 overlaps_b, overlaps_a, last_conflicts);
2627 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2628 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2630 if (!chrec_contains_symbols (chrec_a)
2631 && !chrec_contains_symbols (chrec_b))
2633 analyze_subscript_affine_affine (chrec_a, chrec_b,
2634 overlaps_a, overlaps_b,
2637 if (CF_NOT_KNOWN_P (*overlaps_a)
2638 || CF_NOT_KNOWN_P (*overlaps_b))
2639 dependence_stats.num_siv_unimplemented++;
2640 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2641 || CF_NO_DEPENDENCE_P (*overlaps_b))
2642 dependence_stats.num_siv_independent++;
2644 dependence_stats.num_siv_dependent++;
2646 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2649 analyze_subscript_affine_affine (chrec_a, chrec_b,
2650 overlaps_a, overlaps_b,
2653 if (CF_NOT_KNOWN_P (*overlaps_a)
2654 || CF_NOT_KNOWN_P (*overlaps_b))
2655 dependence_stats.num_siv_unimplemented++;
2656 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2657 || CF_NO_DEPENDENCE_P (*overlaps_b))
2658 dependence_stats.num_siv_independent++;
2660 dependence_stats.num_siv_dependent++;
2663 goto siv_subscript_dontknow;
2668 siv_subscript_dontknow:;
2669 if (dump_file && (dump_flags & TDF_DETAILS))
2670 fprintf (dump_file, "siv test failed: unimplemented.\n");
2671 *overlaps_a = conflict_fn_not_known ();
2672 *overlaps_b = conflict_fn_not_known ();
2673 *last_conflicts = chrec_dont_know;
2674 dependence_stats.num_siv_unimplemented++;
2677 if (dump_file && (dump_flags & TDF_DETAILS))
2678 fprintf (dump_file, ")\n");
2681 /* Returns false if we can prove that the greatest common divisor of the steps
2682 of CHREC does not divide CST, false otherwise. */
2685 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2687 HOST_WIDE_INT cd = 0, val;
2690 if (!host_integerp (cst, 0))
2692 val = tree_low_cst (cst, 0);
2694 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2696 step = CHREC_RIGHT (chrec);
2697 if (!host_integerp (step, 0))
2699 cd = gcd (cd, tree_low_cst (step, 0));
2700 chrec = CHREC_LEFT (chrec);
2703 return val % cd == 0;
2706 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2707 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2708 functions that describe the relation between the elements accessed
2709 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2712 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2715 analyze_miv_subscript (tree chrec_a,
2717 conflict_function **overlaps_a,
2718 conflict_function **overlaps_b,
2719 tree *last_conflicts,
2720 struct loop *loop_nest)
2722 tree type, difference;
2724 dependence_stats.num_miv++;
2725 if (dump_file && (dump_flags & TDF_DETAILS))
2726 fprintf (dump_file, "(analyze_miv_subscript \n");
2728 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2729 chrec_a = chrec_convert (type, chrec_a, NULL);
2730 chrec_b = chrec_convert (type, chrec_b, NULL);
2731 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2733 if (eq_evolutions_p (chrec_a, chrec_b))
2735 /* Access functions are the same: all the elements are accessed
2736 in the same order. */
2737 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2738 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2739 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2740 dependence_stats.num_miv_dependent++;
2743 else if (evolution_function_is_constant_p (difference)
2744 /* For the moment, the following is verified:
2745 evolution_function_is_affine_multivariate_p (chrec_a,
2747 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2749 /* testsuite/.../ssa-chrec-33.c
2750 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2752 The difference is 1, and all the evolution steps are multiples
2753 of 2, consequently there are no overlapping elements. */
2754 *overlaps_a = conflict_fn_no_dependence ();
2755 *overlaps_b = conflict_fn_no_dependence ();
2756 *last_conflicts = integer_zero_node;
2757 dependence_stats.num_miv_independent++;
2760 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2761 && !chrec_contains_symbols (chrec_a)
2762 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2763 && !chrec_contains_symbols (chrec_b))
2765 /* testsuite/.../ssa-chrec-35.c
2766 {0, +, 1}_2 vs. {0, +, 1}_3
2767 the overlapping elements are respectively located at iterations:
2768 {0, +, 1}_x and {0, +, 1}_x,
2769 in other words, we have the equality:
2770 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2773 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2774 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2776 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2777 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2779 analyze_subscript_affine_affine (chrec_a, chrec_b,
2780 overlaps_a, overlaps_b, last_conflicts);
2782 if (CF_NOT_KNOWN_P (*overlaps_a)
2783 || CF_NOT_KNOWN_P (*overlaps_b))
2784 dependence_stats.num_miv_unimplemented++;
2785 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2786 || CF_NO_DEPENDENCE_P (*overlaps_b))
2787 dependence_stats.num_miv_independent++;
2789 dependence_stats.num_miv_dependent++;
2794 /* When the analysis is too difficult, answer "don't know". */
2795 if (dump_file && (dump_flags & TDF_DETAILS))
2796 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2798 *overlaps_a = conflict_fn_not_known ();
2799 *overlaps_b = conflict_fn_not_known ();
2800 *last_conflicts = chrec_dont_know;
2801 dependence_stats.num_miv_unimplemented++;
2804 if (dump_file && (dump_flags & TDF_DETAILS))
2805 fprintf (dump_file, ")\n");
2808 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2809 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2810 OVERLAP_ITERATIONS_B are initialized with two functions that
2811 describe the iterations that contain conflicting elements.
2813 Remark: For an integer k >= 0, the following equality is true:
2815 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2819 analyze_overlapping_iterations (tree chrec_a,
2821 conflict_function **overlap_iterations_a,
2822 conflict_function **overlap_iterations_b,
2823 tree *last_conflicts, struct loop *loop_nest)
2825 unsigned int lnn = loop_nest->num;
2827 dependence_stats.num_subscript_tests++;
2829 if (dump_file && (dump_flags & TDF_DETAILS))
2831 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2832 fprintf (dump_file, " (chrec_a = ");
2833 print_generic_expr (dump_file, chrec_a, 0);
2834 fprintf (dump_file, ")\n (chrec_b = ");
2835 print_generic_expr (dump_file, chrec_b, 0);
2836 fprintf (dump_file, ")\n");
2839 if (chrec_a == NULL_TREE
2840 || chrec_b == NULL_TREE
2841 || chrec_contains_undetermined (chrec_a)
2842 || chrec_contains_undetermined (chrec_b))
2844 dependence_stats.num_subscript_undetermined++;
2846 *overlap_iterations_a = conflict_fn_not_known ();
2847 *overlap_iterations_b = conflict_fn_not_known ();
2850 /* If they are the same chrec, and are affine, they overlap
2851 on every iteration. */
2852 else if (eq_evolutions_p (chrec_a, chrec_b)
2853 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2854 || operand_equal_p (chrec_a, chrec_b, 0)))
2856 dependence_stats.num_same_subscript_function++;
2857 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2858 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2859 *last_conflicts = chrec_dont_know;
2862 /* If they aren't the same, and aren't affine, we can't do anything
2864 else if ((chrec_contains_symbols (chrec_a)
2865 || chrec_contains_symbols (chrec_b))
2866 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2867 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2869 dependence_stats.num_subscript_undetermined++;
2870 *overlap_iterations_a = conflict_fn_not_known ();
2871 *overlap_iterations_b = conflict_fn_not_known ();
2874 else if (ziv_subscript_p (chrec_a, chrec_b))
2875 analyze_ziv_subscript (chrec_a, chrec_b,
2876 overlap_iterations_a, overlap_iterations_b,
2879 else if (siv_subscript_p (chrec_a, chrec_b))
2880 analyze_siv_subscript (chrec_a, chrec_b,
2881 overlap_iterations_a, overlap_iterations_b,
2882 last_conflicts, lnn);
2885 analyze_miv_subscript (chrec_a, chrec_b,
2886 overlap_iterations_a, overlap_iterations_b,
2887 last_conflicts, loop_nest);
2889 if (dump_file && (dump_flags & TDF_DETAILS))
2891 fprintf (dump_file, " (overlap_iterations_a = ");
2892 dump_conflict_function (dump_file, *overlap_iterations_a);
2893 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2894 dump_conflict_function (dump_file, *overlap_iterations_b);
2895 fprintf (dump_file, ")\n");
2896 fprintf (dump_file, ")\n");
2900 /* Helper function for uniquely inserting distance vectors. */
2903 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2908 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2909 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2912 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2915 /* Helper function for uniquely inserting direction vectors. */
2918 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2923 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2924 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2927 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2930 /* Add a distance of 1 on all the loops outer than INDEX. If we
2931 haven't yet determined a distance for this outer loop, push a new
2932 distance vector composed of the previous distance, and a distance
2933 of 1 for this outer loop. Example:
2941 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2942 save (0, 1), then we have to save (1, 0). */
2945 add_outer_distances (struct data_dependence_relation *ddr,
2946 lambda_vector dist_v, int index)
2948 /* For each outer loop where init_v is not set, the accesses are
2949 in dependence of distance 1 in the loop. */
2950 while (--index >= 0)
2952 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2953 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2955 save_dist_v (ddr, save_v);
2959 /* Return false when fail to represent the data dependence as a
2960 distance vector. INIT_B is set to true when a component has been
2961 added to the distance vector DIST_V. INDEX_CARRY is then set to
2962 the index in DIST_V that carries the dependence. */
2965 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2966 struct data_reference *ddr_a,
2967 struct data_reference *ddr_b,
2968 lambda_vector dist_v, bool *init_b,
2972 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2974 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2976 tree access_fn_a, access_fn_b;
2977 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2979 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2981 non_affine_dependence_relation (ddr);
2985 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2986 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2988 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2989 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2992 int var_a = CHREC_VARIABLE (access_fn_a);
2993 int var_b = CHREC_VARIABLE (access_fn_b);
2996 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2998 non_affine_dependence_relation (ddr);
3002 dist = int_cst_value (SUB_DISTANCE (subscript));
3003 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3004 *index_carry = MIN (index, *index_carry);
3006 /* This is the subscript coupling test. If we have already
3007 recorded a distance for this loop (a distance coming from
3008 another subscript), it should be the same. For example,
3009 in the following code, there is no dependence:
3016 if (init_v[index] != 0 && dist_v[index] != dist)
3018 finalize_ddr_dependent (ddr, chrec_known);
3022 dist_v[index] = dist;
3026 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3028 /* This can be for example an affine vs. constant dependence
3029 (T[i] vs. T[3]) that is not an affine dependence and is
3030 not representable as a distance vector. */
3031 non_affine_dependence_relation (ddr);
3039 /* Return true when the DDR contains only constant access functions. */
3042 constant_access_functions (const struct data_dependence_relation *ddr)
3046 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3047 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3048 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3054 /* Helper function for the case where DDR_A and DDR_B are the same
3055 multivariate access function with a constant step. For an example
3059 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3062 tree c_1 = CHREC_LEFT (c_2);
3063 tree c_0 = CHREC_LEFT (c_1);
3064 lambda_vector dist_v;
3067 /* Polynomials with more than 2 variables are not handled yet. When
3068 the evolution steps are parameters, it is not possible to
3069 represent the dependence using classical distance vectors. */
3070 if (TREE_CODE (c_0) != INTEGER_CST
3071 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3072 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3074 DDR_AFFINE_P (ddr) = false;
3078 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3079 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3081 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3082 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3083 v1 = int_cst_value (CHREC_RIGHT (c_1));
3084 v2 = int_cst_value (CHREC_RIGHT (c_2));
3097 save_dist_v (ddr, dist_v);
3099 add_outer_distances (ddr, dist_v, x_1);
3102 /* Helper function for the case where DDR_A and DDR_B are the same
3103 access functions. */
3106 add_other_self_distances (struct data_dependence_relation *ddr)
3108 lambda_vector dist_v;
3110 int index_carry = DDR_NB_LOOPS (ddr);
3112 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3114 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3116 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3118 if (!evolution_function_is_univariate_p (access_fun))
3120 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3122 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3126 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3128 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3129 add_multivariate_self_dist (ddr, access_fun);
3131 /* The evolution step is not constant: it varies in
3132 the outer loop, so this cannot be represented by a
3133 distance vector. For example in pr34635.c the
3134 evolution is {0, +, {0, +, 4}_1}_2. */
3135 DDR_AFFINE_P (ddr) = false;
3140 index_carry = MIN (index_carry,
3141 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3142 DDR_LOOP_NEST (ddr)));
3146 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3147 add_outer_distances (ddr, dist_v, index_carry);
3151 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3153 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3155 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3156 save_dist_v (ddr, dist_v);
3159 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3160 is the case for example when access functions are the same and
3161 equal to a constant, as in:
3168 in which case the distance vectors are (0) and (1). */
3171 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3175 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3177 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3178 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3179 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3181 for (j = 0; j < ca->n; j++)
3182 if (affine_function_zero_p (ca->fns[j]))
3184 insert_innermost_unit_dist_vector (ddr);
3188 for (j = 0; j < cb->n; j++)
3189 if (affine_function_zero_p (cb->fns[j]))
3191 insert_innermost_unit_dist_vector (ddr);
3197 /* Compute the classic per loop distance vector. DDR is the data
3198 dependence relation to build a vector from. Return false when fail
3199 to represent the data dependence as a distance vector. */
3202 build_classic_dist_vector (struct data_dependence_relation *ddr,
3203 struct loop *loop_nest)
3205 bool init_b = false;
3206 int index_carry = DDR_NB_LOOPS (ddr);
3207 lambda_vector dist_v;
3209 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3212 if (same_access_functions (ddr))
3214 /* Save the 0 vector. */
3215 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3216 save_dist_v (ddr, dist_v);
3218 if (constant_access_functions (ddr))
3219 add_distance_for_zero_overlaps (ddr);
3221 if (DDR_NB_LOOPS (ddr) > 1)
3222 add_other_self_distances (ddr);
3227 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3228 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3229 dist_v, &init_b, &index_carry))
3232 /* Save the distance vector if we initialized one. */
3235 /* Verify a basic constraint: classic distance vectors should
3236 always be lexicographically positive.
3238 Data references are collected in the order of execution of
3239 the program, thus for the following loop
3241 | for (i = 1; i < 100; i++)
3242 | for (j = 1; j < 100; j++)
3244 | t = T[j+1][i-1]; // A
3245 | T[j][i] = t + 2; // B
3248 references are collected following the direction of the wind:
3249 A then B. The data dependence tests are performed also
3250 following this order, such that we're looking at the distance
3251 separating the elements accessed by A from the elements later
3252 accessed by B. But in this example, the distance returned by
3253 test_dep (A, B) is lexicographically negative (-1, 1), that
3254 means that the access A occurs later than B with respect to
3255 the outer loop, ie. we're actually looking upwind. In this
3256 case we solve test_dep (B, A) looking downwind to the
3257 lexicographically positive solution, that returns the
3258 distance vector (1, -1). */
3259 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3261 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3262 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3265 compute_subscript_distance (ddr);
3266 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3267 save_v, &init_b, &index_carry))
3269 save_dist_v (ddr, save_v);
3270 DDR_REVERSED_P (ddr) = true;
3272 /* In this case there is a dependence forward for all the
3275 | for (k = 1; k < 100; k++)
3276 | for (i = 1; i < 100; i++)
3277 | for (j = 1; j < 100; j++)
3279 | t = T[j+1][i-1]; // A
3280 | T[j][i] = t + 2; // B
3288 if (DDR_NB_LOOPS (ddr) > 1)
3290 add_outer_distances (ddr, save_v, index_carry);
3291 add_outer_distances (ddr, dist_v, index_carry);
3296 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3297 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3299 if (DDR_NB_LOOPS (ddr) > 1)
3301 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3303 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3304 DDR_A (ddr), loop_nest))
3306 compute_subscript_distance (ddr);
3307 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3308 opposite_v, &init_b,
3312 save_dist_v (ddr, save_v);
3313 add_outer_distances (ddr, dist_v, index_carry);
3314 add_outer_distances (ddr, opposite_v, index_carry);
3317 save_dist_v (ddr, save_v);
3322 /* There is a distance of 1 on all the outer loops: Example:
3323 there is a dependence of distance 1 on loop_1 for the array A.
3329 add_outer_distances (ddr, dist_v,
3330 lambda_vector_first_nz (dist_v,
3331 DDR_NB_LOOPS (ddr), 0));
3334 if (dump_file && (dump_flags & TDF_DETAILS))
3338 fprintf (dump_file, "(build_classic_dist_vector\n");
3339 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3341 fprintf (dump_file, " dist_vector = (");
3342 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3343 DDR_NB_LOOPS (ddr));
3344 fprintf (dump_file, " )\n");
3346 fprintf (dump_file, ")\n");
3352 /* Return the direction for a given distance.
3353 FIXME: Computing dir this way is suboptimal, since dir can catch
3354 cases that dist is unable to represent. */
3356 static inline enum data_dependence_direction
3357 dir_from_dist (int dist)
3360 return dir_positive;
3362 return dir_negative;
3367 /* Compute the classic per loop direction vector. DDR is the data
3368 dependence relation to build a vector from. */
3371 build_classic_dir_vector (struct data_dependence_relation *ddr)
3374 lambda_vector dist_v;
3376 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3378 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3380 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3381 dir_v[j] = dir_from_dist (dist_v[j]);
3383 save_dir_v (ddr, dir_v);
3387 /* Helper function. Returns true when there is a dependence between
3388 data references DRA and DRB. */
3391 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3392 struct data_reference *dra,
3393 struct data_reference *drb,
3394 struct loop *loop_nest)
3397 tree last_conflicts;
3398 struct subscript *subscript;
3400 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3403 conflict_function *overlaps_a, *overlaps_b;
3405 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3406 DR_ACCESS_FN (drb, i),
3407 &overlaps_a, &overlaps_b,
3408 &last_conflicts, loop_nest);
3410 if (CF_NOT_KNOWN_P (overlaps_a)
3411 || CF_NOT_KNOWN_P (overlaps_b))
3413 finalize_ddr_dependent (ddr, chrec_dont_know);
3414 dependence_stats.num_dependence_undetermined++;
3415 free_conflict_function (overlaps_a);
3416 free_conflict_function (overlaps_b);
3420 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3421 || CF_NO_DEPENDENCE_P (overlaps_b))
3423 finalize_ddr_dependent (ddr, chrec_known);
3424 dependence_stats.num_dependence_independent++;
3425 free_conflict_function (overlaps_a);
3426 free_conflict_function (overlaps_b);
3432 if (SUB_CONFLICTS_IN_A (subscript))
3433 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3434 if (SUB_CONFLICTS_IN_B (subscript))
3435 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3437 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3438 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3439 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3446 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3449 subscript_dependence_tester (struct data_dependence_relation *ddr,
3450 struct loop *loop_nest)
3453 if (dump_file && (dump_flags & TDF_DETAILS))
3454 fprintf (dump_file, "(subscript_dependence_tester \n");
3456 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3457 dependence_stats.num_dependence_dependent++;
3459 compute_subscript_distance (ddr);
3460 if (build_classic_dist_vector (ddr, loop_nest))
3461 build_classic_dir_vector (ddr);
3463 if (dump_file && (dump_flags & TDF_DETAILS))
3464 fprintf (dump_file, ")\n");
3467 /* Returns true when all the access functions of A are affine or
3468 constant with respect to LOOP_NEST. */
3471 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3472 const struct loop *loop_nest)
3475 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3478 FOR_EACH_VEC_ELT (tree, fns, i, t)
3479 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3480 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3486 /* Initializes an equation for an OMEGA problem using the information
3487 contained in the ACCESS_FUN. Returns true when the operation
3490 PB is the omega constraint system.
3491 EQ is the number of the equation to be initialized.
3492 OFFSET is used for shifting the variables names in the constraints:
3493 a constrain is composed of 2 * the number of variables surrounding
3494 dependence accesses. OFFSET is set either to 0 for the first n variables,
3495 then it is set to n.
3496 ACCESS_FUN is expected to be an affine chrec. */
3499 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3500 unsigned int offset, tree access_fun,
3501 struct data_dependence_relation *ddr)
3503 switch (TREE_CODE (access_fun))
3505 case POLYNOMIAL_CHREC:
3507 tree left = CHREC_LEFT (access_fun);
3508 tree right = CHREC_RIGHT (access_fun);
3509 int var = CHREC_VARIABLE (access_fun);
3512 if (TREE_CODE (right) != INTEGER_CST)
3515 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3516 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3518 /* Compute the innermost loop index. */
3519 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3522 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3523 += int_cst_value (right);
3525 switch (TREE_CODE (left))
3527 case POLYNOMIAL_CHREC:
3528 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3531 pb->eqs[eq].coef[0] += int_cst_value (left);
3540 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3548 /* As explained in the comments preceding init_omega_for_ddr, we have
3549 to set up a system for each loop level, setting outer loops
3550 variation to zero, and current loop variation to positive or zero.
3551 Save each lexico positive distance vector. */
3554 omega_extract_distance_vectors (omega_pb pb,
3555 struct data_dependence_relation *ddr)
3559 struct loop *loopi, *loopj;
3560 enum omega_result res;
3562 /* Set a new problem for each loop in the nest. The basis is the
3563 problem that we have initialized until now. On top of this we
3564 add new constraints. */
3565 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3566 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3569 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3570 DDR_NB_LOOPS (ddr));
3572 omega_copy_problem (copy, pb);
3574 /* For all the outer loops "loop_j", add "dj = 0". */
3576 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3578 eq = omega_add_zero_eq (copy, omega_black);
3579 copy->eqs[eq].coef[j + 1] = 1;
3582 /* For "loop_i", add "0 <= di". */
3583 geq = omega_add_zero_geq (copy, omega_black);
3584 copy->geqs[geq].coef[i + 1] = 1;
3586 /* Reduce the constraint system, and test that the current
3587 problem is feasible. */
3588 res = omega_simplify_problem (copy);
3589 if (res == omega_false
3590 || res == omega_unknown
3591 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3594 for (eq = 0; eq < copy->num_subs; eq++)
3595 if (copy->subs[eq].key == (int) i + 1)
3597 dist = copy->subs[eq].coef[0];
3603 /* Reinitialize problem... */
3604 omega_copy_problem (copy, pb);
3606 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3608 eq = omega_add_zero_eq (copy, omega_black);
3609 copy->eqs[eq].coef[j + 1] = 1;
3612 /* ..., but this time "di = 1". */
3613 eq = omega_add_zero_eq (copy, omega_black);
3614 copy->eqs[eq].coef[i + 1] = 1;
3615 copy->eqs[eq].coef[0] = -1;
3617 res = omega_simplify_problem (copy);
3618 if (res == omega_false
3619 || res == omega_unknown
3620 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3623 for (eq = 0; eq < copy->num_subs; eq++)
3624 if (copy->subs[eq].key == (int) i + 1)
3626 dist = copy->subs[eq].coef[0];
3632 /* Save the lexicographically positive distance vector. */
3635 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3636 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3640 for (eq = 0; eq < copy->num_subs; eq++)
3641 if (copy->subs[eq].key > 0)
3643 dist = copy->subs[eq].coef[0];
3644 dist_v[copy->subs[eq].key - 1] = dist;
3647 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3648 dir_v[j] = dir_from_dist (dist_v[j]);
3650 save_dist_v (ddr, dist_v);
3651 save_dir_v (ddr, dir_v);
3655 omega_free_problem (copy);
3659 /* This is called for each subscript of a tuple of data references:
3660 insert an equality for representing the conflicts. */
3663 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3664 struct data_dependence_relation *ddr,
3665 omega_pb pb, bool *maybe_dependent)
3668 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3669 TREE_TYPE (access_fun_b));
3670 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3671 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3672 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3675 /* When the fun_a - fun_b is not constant, the dependence is not
3676 captured by the classic distance vector representation. */
3677 if (TREE_CODE (difference) != INTEGER_CST)
3681 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3683 /* There is no dependence. */
3684 *maybe_dependent = false;
3688 minus_one = build_int_cst (type, -1);
3689 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3691 eq = omega_add_zero_eq (pb, omega_black);
3692 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3693 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3694 /* There is probably a dependence, but the system of
3695 constraints cannot be built: answer "don't know". */
3699 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3700 && !int_divides_p (lambda_vector_gcd
3701 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3702 2 * DDR_NB_LOOPS (ddr)),
3703 pb->eqs[eq].coef[0]))
3705 /* There is no dependence. */
3706 *maybe_dependent = false;
3713 /* Helper function, same as init_omega_for_ddr but specialized for
3714 data references A and B. */
3717 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3718 struct data_dependence_relation *ddr,
3719 omega_pb pb, bool *maybe_dependent)
3724 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3726 /* Insert an equality per subscript. */
3727 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3729 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3730 ddr, pb, maybe_dependent))
3732 else if (*maybe_dependent == false)
3734 /* There is no dependence. */
3735 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3740 /* Insert inequalities: constraints corresponding to the iteration
3741 domain, i.e. the loops surrounding the references "loop_x" and
3742 the distance variables "dx". The layout of the OMEGA
3743 representation is as follows:
3744 - coef[0] is the constant
3745 - coef[1..nb_loops] are the protected variables that will not be
3746 removed by the solver: the "dx"
3747 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3749 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3750 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3752 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3755 ineq = omega_add_zero_geq (pb, omega_black);
3756 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3758 /* 0 <= loop_x + dx */
3759 ineq = omega_add_zero_geq (pb, omega_black);
3760 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3761 pb->geqs[ineq].coef[i + 1] = 1;
3765 /* loop_x <= nb_iters */
3766 ineq = omega_add_zero_geq (pb, omega_black);
3767 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3768 pb->geqs[ineq].coef[0] = nbi;
3770 /* loop_x + dx <= nb_iters */
3771 ineq = omega_add_zero_geq (pb, omega_black);
3772 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3773 pb->geqs[ineq].coef[i + 1] = -1;
3774 pb->geqs[ineq].coef[0] = nbi;
3776 /* A step "dx" bigger than nb_iters is not feasible, so
3777 add "0 <= nb_iters + dx", */
3778 ineq = omega_add_zero_geq (pb, omega_black);
3779 pb->geqs[ineq].coef[i + 1] = 1;
3780 pb->geqs[ineq].coef[0] = nbi;
3781 /* and "dx <= nb_iters". */
3782 ineq = omega_add_zero_geq (pb, omega_black);
3783 pb->geqs[ineq].coef[i + 1] = -1;
3784 pb->geqs[ineq].coef[0] = nbi;
3788 omega_extract_distance_vectors (pb, ddr);
3793 /* Sets up the Omega dependence problem for the data dependence
3794 relation DDR. Returns false when the constraint system cannot be
3795 built, ie. when the test answers "don't know". Returns true
3796 otherwise, and when independence has been proved (using one of the
3797 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3798 set MAYBE_DEPENDENT to true.
3800 Example: for setting up the dependence system corresponding to the
3801 conflicting accesses
3806 | ... A[2*j, 2*(i + j)]
3810 the following constraints come from the iteration domain:
3817 where di, dj are the distance variables. The constraints
3818 representing the conflicting elements are:
3821 i + 1 = 2 * (i + di + j + dj)
3823 For asking that the resulting distance vector (di, dj) be
3824 lexicographically positive, we insert the constraint "di >= 0". If
3825 "di = 0" in the solution, we fix that component to zero, and we
3826 look at the inner loops: we set a new problem where all the outer
3827 loop distances are zero, and fix this inner component to be
3828 positive. When one of the components is positive, we save that
3829 distance, and set a new problem where the distance on this loop is
3830 zero, searching for other distances in the inner loops. Here is
3831 the classic example that illustrates that we have to set for each
3832 inner loop a new problem:
3840 we have to save two distances (1, 0) and (0, 1).
3842 Given two array references, refA and refB, we have to set the
3843 dependence problem twice, refA vs. refB and refB vs. refA, and we
3844 cannot do a single test, as refB might occur before refA in the
3845 inner loops, and the contrary when considering outer loops: ex.
3850 | T[{1,+,1}_2][{1,+,1}_1] // refA
3851 | T[{2,+,1}_2][{0,+,1}_1] // refB
3856 refB touches the elements in T before refA, and thus for the same
3857 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3858 but for successive loop_0 iterations, we have (1, -1, 1)
3860 The Omega solver expects the distance variables ("di" in the
3861 previous example) to come first in the constraint system (as
3862 variables to be protected, or "safe" variables), the constraint
3863 system is built using the following layout:
3865 "cst | distance vars | index vars".
3869 init_omega_for_ddr (struct data_dependence_relation *ddr,
3870 bool *maybe_dependent)
3875 *maybe_dependent = true;
3877 if (same_access_functions (ddr))
3880 lambda_vector dir_v;
3882 /* Save the 0 vector. */
3883 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3884 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3885 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3886 dir_v[j] = dir_equal;
3887 save_dir_v (ddr, dir_v);
3889 /* Save the dependences carried by outer loops. */
3890 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3891 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3893 omega_free_problem (pb);
3897 /* Omega expects the protected variables (those that have to be kept
3898 after elimination) to appear first in the constraint system.
3899 These variables are the distance variables. In the following
3900 initialization we declare NB_LOOPS safe variables, and the total
3901 number of variables for the constraint system is 2*NB_LOOPS. */
3902 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3903 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3905 omega_free_problem (pb);
3907 /* Stop computation if not decidable, or no dependence. */
3908 if (res == false || *maybe_dependent == false)
3911 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3912 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3914 omega_free_problem (pb);
3919 /* Return true when DDR contains the same information as that stored
3920 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3923 ddr_consistent_p (FILE *file,
3924 struct data_dependence_relation *ddr,
3925 VEC (lambda_vector, heap) *dist_vects,
3926 VEC (lambda_vector, heap) *dir_vects)
3930 /* If dump_file is set, output there. */
3931 if (dump_file && (dump_flags & TDF_DETAILS))
3934 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3936 lambda_vector b_dist_v;
3937 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3938 VEC_length (lambda_vector, dist_vects),
3939 DDR_NUM_DIST_VECTS (ddr));
3941 fprintf (file, "Banerjee dist vectors:\n");
3942 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3943 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3945 fprintf (file, "Omega dist vectors:\n");
3946 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3947 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3949 fprintf (file, "data dependence relation:\n");
3950 dump_data_dependence_relation (file, ddr);
3952 fprintf (file, ")\n");
3956 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3958 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3959 VEC_length (lambda_vector, dir_vects),
3960 DDR_NUM_DIR_VECTS (ddr));
3964 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3966 lambda_vector a_dist_v;
3967 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3969 /* Distance vectors are not ordered in the same way in the DDR
3970 and in the DIST_VECTS: search for a matching vector. */
3971 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3972 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3975 if (j == VEC_length (lambda_vector, dist_vects))
3977 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3978 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3979 fprintf (file, "not found in Omega dist vectors:\n");
3980 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3981 fprintf (file, "data dependence relation:\n");
3982 dump_data_dependence_relation (file, ddr);
3983 fprintf (file, ")\n");
3987 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3989 lambda_vector a_dir_v;
3990 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3992 /* Direction vectors are not ordered in the same way in the DDR
3993 and in the DIR_VECTS: search for a matching vector. */
3994 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3995 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3998 if (j == VEC_length (lambda_vector, dist_vects))
4000 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4001 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4002 fprintf (file, "not found in Omega dir vectors:\n");
4003 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4004 fprintf (file, "data dependence relation:\n");
4005 dump_data_dependence_relation (file, ddr);
4006 fprintf (file, ")\n");
4013 /* This computes the affine dependence relation between A and B with
4014 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
4015 independence between two accesses, while CHREC_DONT_KNOW is used
4016 for representing the unknown relation.
4018 Note that it is possible to stop the computation of the dependence
4019 relation the first time we detect a CHREC_KNOWN element for a given
4023 compute_affine_dependence (struct data_dependence_relation *ddr,
4024 struct loop *loop_nest)
4026 struct data_reference *dra = DDR_A (ddr);
4027 struct data_reference *drb = DDR_B (ddr);
4029 if (dump_file && (dump_flags & TDF_DETAILS))
4031 fprintf (dump_file, "(compute_affine_dependence\n");
4032 fprintf (dump_file, " (stmt_a = \n");
4033 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4034 fprintf (dump_file, ")\n (stmt_b = \n");
4035 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4036 fprintf (dump_file, ")\n");
4039 /* Analyze only when the dependence relation is not yet known. */
4040 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4041 && !DDR_SELF_REFERENCE (ddr))
4043 dependence_stats.num_dependence_tests++;
4045 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4046 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4048 if (flag_check_data_deps)
4050 /* Compute the dependences using the first algorithm. */
4051 subscript_dependence_tester (ddr, loop_nest);
4053 if (dump_file && (dump_flags & TDF_DETAILS))
4055 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4056 dump_data_dependence_relation (dump_file, ddr);
4059 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4061 bool maybe_dependent;
4062 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4064 /* Save the result of the first DD analyzer. */
4065 dist_vects = DDR_DIST_VECTS (ddr);
4066 dir_vects = DDR_DIR_VECTS (ddr);
4068 /* Reset the information. */
4069 DDR_DIST_VECTS (ddr) = NULL;
4070 DDR_DIR_VECTS (ddr) = NULL;
4072 /* Compute the same information using Omega. */
4073 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4074 goto csys_dont_know;
4076 if (dump_file && (dump_flags & TDF_DETAILS))
4078 fprintf (dump_file, "Omega Analyzer\n");
4079 dump_data_dependence_relation (dump_file, ddr);
4082 /* Check that we get the same information. */
4083 if (maybe_dependent)
4084 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4089 subscript_dependence_tester (ddr, loop_nest);
4092 /* As a last case, if the dependence cannot be determined, or if
4093 the dependence is considered too difficult to determine, answer
4098 dependence_stats.num_dependence_undetermined++;
4100 if (dump_file && (dump_flags & TDF_DETAILS))
4102 fprintf (dump_file, "Data ref a:\n");
4103 dump_data_reference (dump_file, dra);
4104 fprintf (dump_file, "Data ref b:\n");
4105 dump_data_reference (dump_file, drb);
4106 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4108 finalize_ddr_dependent (ddr, chrec_dont_know);
4112 if (dump_file && (dump_flags & TDF_DETAILS))
4113 fprintf (dump_file, ")\n");
4116 /* This computes the dependence relation for the same data
4117 reference into DDR. */
4120 compute_self_dependence (struct data_dependence_relation *ddr)
4123 struct subscript *subscript;
4125 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4128 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4131 if (SUB_CONFLICTS_IN_A (subscript))
4132 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4133 if (SUB_CONFLICTS_IN_B (subscript))
4134 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4136 /* The accessed index overlaps for each iteration. */
4137 SUB_CONFLICTS_IN_A (subscript)
4138 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4139 SUB_CONFLICTS_IN_B (subscript)
4140 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4141 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4144 /* The distance vector is the zero vector. */
4145 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4146 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4149 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4150 the data references in DATAREFS, in the LOOP_NEST. When
4151 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4155 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4156 VEC (ddr_p, heap) **dependence_relations,
4157 VEC (loop_p, heap) *loop_nest,
4158 bool compute_self_and_rr)
4160 struct data_dependence_relation *ddr;
4161 struct data_reference *a, *b;
4164 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4165 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4166 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4168 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4169 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4171 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4174 if (compute_self_and_rr)
4175 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4177 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4178 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4179 compute_self_dependence (ddr);
4183 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4184 true if STMT clobbers memory, false otherwise. */
4187 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4189 bool clobbers_memory = false;
4192 enum gimple_code stmt_code = gimple_code (stmt);
4196 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4197 Calls have side-effects, except those to const or pure
4199 if ((stmt_code == GIMPLE_CALL
4200 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4201 || (stmt_code == GIMPLE_ASM
4202 && gimple_asm_volatile_p (stmt)))
4203 clobbers_memory = true;
4205 if (!gimple_vuse (stmt))
4206 return clobbers_memory;
4208 if (stmt_code == GIMPLE_ASSIGN)
4211 op0 = gimple_assign_lhs_ptr (stmt);
4212 op1 = gimple_assign_rhs1_ptr (stmt);
4215 || (REFERENCE_CLASS_P (*op1)
4216 && (base = get_base_address (*op1))
4217 && TREE_CODE (base) != SSA_NAME))
4219 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4221 ref->is_read = true;
4224 else if (stmt_code == GIMPLE_CALL)
4228 op0 = gimple_call_lhs_ptr (stmt);
4229 n = gimple_call_num_args (stmt);
4230 for (i = 0; i < n; i++)
4232 op1 = gimple_call_arg_ptr (stmt, i);
4235 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4237 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4239 ref->is_read = true;
4244 return clobbers_memory;
4248 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4250 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4252 ref->is_read = false;
4254 return clobbers_memory;
4257 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4258 reference, returns false, otherwise returns true. NEST is the outermost
4259 loop of the loop nest in which the references should be analyzed. */
4262 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4263 VEC (data_reference_p, heap) **datarefs)
4266 VEC (data_ref_loc, heap) *references;
4269 data_reference_p dr;
4271 if (get_references_in_stmt (stmt, &references))
4273 VEC_free (data_ref_loc, heap, references);
4277 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4279 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4280 *ref->pos, stmt, ref->is_read);
4281 gcc_assert (dr != NULL);
4282 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4284 VEC_free (data_ref_loc, heap, references);
4288 /* Stores the data references in STMT to DATAREFS. If there is an
4289 unanalyzable reference, returns false, otherwise returns true.
4290 NEST is the outermost loop of the loop nest in which the references
4291 should be instantiated, LOOP is the loop in which the references
4292 should be analyzed. */
4295 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4296 VEC (data_reference_p, heap) **datarefs)
4299 VEC (data_ref_loc, heap) *references;
4302 data_reference_p dr;
4304 if (get_references_in_stmt (stmt, &references))
4306 VEC_free (data_ref_loc, heap, references);
4310 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4312 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4313 gcc_assert (dr != NULL);
4314 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4317 VEC_free (data_ref_loc, heap, references);
4321 /* Search the data references in LOOP, and record the information into
4322 DATAREFS. Returns chrec_dont_know when failing to analyze a
4323 difficult case, returns NULL_TREE otherwise. */
4326 find_data_references_in_bb (struct loop *loop, basic_block bb,
4327 VEC (data_reference_p, heap) **datarefs)
4329 gimple_stmt_iterator bsi;
4331 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4333 gimple stmt = gsi_stmt (bsi);
4335 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4337 struct data_reference *res;
4338 res = XCNEW (struct data_reference);
4339 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4341 return chrec_dont_know;
4348 /* Search the data references in LOOP, and record the information into
4349 DATAREFS. Returns chrec_dont_know when failing to analyze a
4350 difficult case, returns NULL_TREE otherwise.
4352 TODO: This function should be made smarter so that it can handle address
4353 arithmetic as if they were array accesses, etc. */
4356 find_data_references_in_loop (struct loop *loop,
4357 VEC (data_reference_p, heap) **datarefs)
4359 basic_block bb, *bbs;
4362 bbs = get_loop_body_in_dom_order (loop);
4364 for (i = 0; i < loop->num_nodes; i++)
4368 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4371 return chrec_dont_know;
4379 /* Recursive helper function. */
4382 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4384 /* Inner loops of the nest should not contain siblings. Example:
4385 when there are two consecutive loops,
4396 the dependence relation cannot be captured by the distance
4401 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4403 return find_loop_nest_1 (loop->inner, loop_nest);
4407 /* Return false when the LOOP is not well nested. Otherwise return
4408 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4409 contain the loops from the outermost to the innermost, as they will
4410 appear in the classic distance vector. */
4413 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4415 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4417 return find_loop_nest_1 (loop->inner, loop_nest);
4421 /* Returns true when the data dependences have been computed, false otherwise.
4422 Given a loop nest LOOP, the following vectors are returned:
4423 DATAREFS is initialized to all the array elements contained in this loop,
4424 DEPENDENCE_RELATIONS contains the relations between the data references.
4425 Compute read-read and self relations if
4426 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4429 compute_data_dependences_for_loop (struct loop *loop,
4430 bool compute_self_and_read_read_dependences,
4431 VEC (loop_p, heap) **loop_nest,
4432 VEC (data_reference_p, heap) **datarefs,
4433 VEC (ddr_p, heap) **dependence_relations)
4437 memset (&dependence_stats, 0, sizeof (dependence_stats));
4439 /* If the loop nest is not well formed, or one of the data references
4440 is not computable, give up without spending time to compute other
4443 || !find_loop_nest (loop, loop_nest)
4444 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4446 struct data_dependence_relation *ddr;
4448 /* Insert a single relation into dependence_relations:
4450 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4451 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4455 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4456 compute_self_and_read_read_dependences);
4458 if (dump_file && (dump_flags & TDF_STATS))
4460 fprintf (dump_file, "Dependence tester statistics:\n");
4462 fprintf (dump_file, "Number of dependence tests: %d\n",
4463 dependence_stats.num_dependence_tests);
4464 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4465 dependence_stats.num_dependence_dependent);
4466 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4467 dependence_stats.num_dependence_independent);
4468 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4469 dependence_stats.num_dependence_undetermined);
4471 fprintf (dump_file, "Number of subscript tests: %d\n",
4472 dependence_stats.num_subscript_tests);
4473 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4474 dependence_stats.num_subscript_undetermined);
4475 fprintf (dump_file, "Number of same subscript function: %d\n",
4476 dependence_stats.num_same_subscript_function);
4478 fprintf (dump_file, "Number of ziv tests: %d\n",
4479 dependence_stats.num_ziv);
4480 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4481 dependence_stats.num_ziv_dependent);
4482 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4483 dependence_stats.num_ziv_independent);
4484 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4485 dependence_stats.num_ziv_unimplemented);
4487 fprintf (dump_file, "Number of siv tests: %d\n",
4488 dependence_stats.num_siv);
4489 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4490 dependence_stats.num_siv_dependent);
4491 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4492 dependence_stats.num_siv_independent);
4493 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4494 dependence_stats.num_siv_unimplemented);
4496 fprintf (dump_file, "Number of miv tests: %d\n",
4497 dependence_stats.num_miv);
4498 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4499 dependence_stats.num_miv_dependent);
4500 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4501 dependence_stats.num_miv_independent);
4502 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4503 dependence_stats.num_miv_unimplemented);
4509 /* Returns true when the data dependences for the basic block BB have been
4510 computed, false otherwise.
4511 DATAREFS is initialized to all the array elements contained in this basic
4512 block, DEPENDENCE_RELATIONS contains the relations between the data
4513 references. Compute read-read and self relations if
4514 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4516 compute_data_dependences_for_bb (basic_block bb,
4517 bool compute_self_and_read_read_dependences,
4518 VEC (data_reference_p, heap) **datarefs,
4519 VEC (ddr_p, heap) **dependence_relations)
4521 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4524 compute_all_dependences (*datarefs, dependence_relations, NULL,
4525 compute_self_and_read_read_dependences);
4529 /* Entry point (for testing only). Analyze all the data references
4530 and the dependence relations in LOOP.
4532 The data references are computed first.
4534 A relation on these nodes is represented by a complete graph. Some
4535 of the relations could be of no interest, thus the relations can be
4538 In the following function we compute all the relations. This is
4539 just a first implementation that is here for:
4540 - for showing how to ask for the dependence relations,
4541 - for the debugging the whole dependence graph,
4542 - for the dejagnu testcases and maintenance.
4544 It is possible to ask only for a part of the graph, avoiding to
4545 compute the whole dependence graph. The computed dependences are
4546 stored in a knowledge base (KB) such that later queries don't
4547 recompute the same information. The implementation of this KB is
4548 transparent to the optimizer, and thus the KB can be changed with a
4549 more efficient implementation, or the KB could be disabled. */
4551 analyze_all_data_dependences (struct loop *loop)
4554 int nb_data_refs = 10;
4555 VEC (data_reference_p, heap) *datarefs =
4556 VEC_alloc (data_reference_p, heap, nb_data_refs);
4557 VEC (ddr_p, heap) *dependence_relations =
4558 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4559 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4561 /* Compute DDs on the whole function. */
4562 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4563 &dependence_relations);
4567 dump_data_dependence_relations (dump_file, dependence_relations);
4568 fprintf (dump_file, "\n\n");
4570 if (dump_flags & TDF_DETAILS)
4571 dump_dist_dir_vectors (dump_file, dependence_relations);
4573 if (dump_flags & TDF_STATS)
4575 unsigned nb_top_relations = 0;
4576 unsigned nb_bot_relations = 0;
4577 unsigned nb_chrec_relations = 0;
4578 struct data_dependence_relation *ddr;
4580 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4582 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4585 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4589 nb_chrec_relations++;
4592 gather_stats_on_scev_database ();
4596 VEC_free (loop_p, heap, loop_nest);
4597 free_dependence_relations (dependence_relations);
4598 free_data_refs (datarefs);
4601 /* Computes all the data dependences and check that the results of
4602 several analyzers are the same. */
4605 tree_check_data_deps (void)
4608 struct loop *loop_nest;
4610 FOR_EACH_LOOP (li, loop_nest, 0)
4611 analyze_all_data_dependences (loop_nest);
4614 /* Free the memory used by a data dependence relation DDR. */
4617 free_dependence_relation (struct data_dependence_relation *ddr)
4622 if (DDR_SUBSCRIPTS (ddr))
4623 free_subscripts (DDR_SUBSCRIPTS (ddr));
4624 if (DDR_DIST_VECTS (ddr))
4625 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4626 if (DDR_DIR_VECTS (ddr))
4627 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4632 /* Free the memory used by the data dependence relations from
4633 DEPENDENCE_RELATIONS. */
4636 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4639 struct data_dependence_relation *ddr;
4641 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4643 free_dependence_relation (ddr);
4645 VEC_free (ddr_p, heap, dependence_relations);
4648 /* Free the memory used by the data references from DATAREFS. */
4651 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4654 struct data_reference *dr;
4656 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4658 VEC_free (data_reference_p, heap, datarefs);
4663 /* Dump vertex I in RDG to FILE. */
4666 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4668 struct vertex *v = &(rdg->vertices[i]);
4669 struct graph_edge *e;
4671 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4672 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4673 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4676 for (e = v->pred; e; e = e->pred_next)
4677 fprintf (file, " %d", e->src);
4679 fprintf (file, ") (out:");
4682 for (e = v->succ; e; e = e->succ_next)
4683 fprintf (file, " %d", e->dest);
4685 fprintf (file, ")\n");
4686 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4687 fprintf (file, ")\n");
4690 /* Call dump_rdg_vertex on stderr. */
4693 debug_rdg_vertex (struct graph *rdg, int i)
4695 dump_rdg_vertex (stderr, rdg, i);
4698 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4699 dumped vertices to that bitmap. */
4701 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4705 fprintf (file, "(%d\n", c);
4707 for (i = 0; i < rdg->n_vertices; i++)
4708 if (rdg->vertices[i].component == c)
4711 bitmap_set_bit (dumped, i);
4713 dump_rdg_vertex (file, rdg, i);
4716 fprintf (file, ")\n");
4719 /* Call dump_rdg_vertex on stderr. */
4722 debug_rdg_component (struct graph *rdg, int c)
4724 dump_rdg_component (stderr, rdg, c, NULL);
4727 /* Dump the reduced dependence graph RDG to FILE. */
4730 dump_rdg (FILE *file, struct graph *rdg)
4733 bitmap dumped = BITMAP_ALLOC (NULL);
4735 fprintf (file, "(rdg\n");
4737 for (i = 0; i < rdg->n_vertices; i++)
4738 if (!bitmap_bit_p (dumped, i))
4739 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4741 fprintf (file, ")\n");
4742 BITMAP_FREE (dumped);
4745 /* Call dump_rdg on stderr. */
4748 debug_rdg (struct graph *rdg)
4750 dump_rdg (stderr, rdg);
4754 dot_rdg_1 (FILE *file, struct graph *rdg)
4758 fprintf (file, "digraph RDG {\n");
4760 for (i = 0; i < rdg->n_vertices; i++)
4762 struct vertex *v = &(rdg->vertices[i]);
4763 struct graph_edge *e;
4765 /* Highlight reads from memory. */
4766 if (RDG_MEM_READS_STMT (rdg, i))
4767 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4769 /* Highlight stores to memory. */
4770 if (RDG_MEM_WRITE_STMT (rdg, i))
4771 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4774 for (e = v->succ; e; e = e->succ_next)
4775 switch (RDGE_TYPE (e))
4778 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4782 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4786 /* These are the most common dependences: don't print these. */
4787 fprintf (file, "%d -> %d \n", i, e->dest);
4791 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4799 fprintf (file, "}\n\n");
4802 /* Display the Reduced Dependence Graph using dotty. */
4803 extern void dot_rdg (struct graph *);
4806 dot_rdg (struct graph *rdg)
4808 /* When debugging, enable the following code. This cannot be used
4809 in production compilers because it calls "system". */
4811 FILE *file = fopen ("/tmp/rdg.dot", "w");
4812 gcc_assert (file != NULL);
4814 dot_rdg_1 (file, rdg);
4817 system ("dotty /tmp/rdg.dot &");
4819 dot_rdg_1 (stderr, rdg);
4823 /* This structure is used for recording the mapping statement index in
4826 struct GTY(()) rdg_vertex_info
4832 /* Returns the index of STMT in RDG. */
4835 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4837 struct rdg_vertex_info rvi, *slot;
4840 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4848 /* Creates an edge in RDG for each distance vector from DDR. The
4849 order that we keep track of in the RDG is the order in which
4850 statements have to be executed. */
4853 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4855 struct graph_edge *e;
4857 data_reference_p dra = DDR_A (ddr);
4858 data_reference_p drb = DDR_B (ddr);
4859 unsigned level = ddr_dependence_level (ddr);
4861 /* For non scalar dependences, when the dependence is REVERSED,
4862 statement B has to be executed before statement A. */
4864 && !DDR_REVERSED_P (ddr))
4866 data_reference_p tmp = dra;
4871 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4872 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4874 if (va < 0 || vb < 0)
4877 e = add_edge (rdg, va, vb);
4878 e->data = XNEW (struct rdg_edge);
4880 RDGE_LEVEL (e) = level;
4881 RDGE_RELATION (e) = ddr;
4883 /* Determines the type of the data dependence. */
4884 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4885 RDGE_TYPE (e) = input_dd;
4886 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4887 RDGE_TYPE (e) = output_dd;
4888 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4889 RDGE_TYPE (e) = flow_dd;
4890 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4891 RDGE_TYPE (e) = anti_dd;
4894 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4895 the index of DEF in RDG. */
4898 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4900 use_operand_p imm_use_p;
4901 imm_use_iterator iterator;
4903 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4905 struct graph_edge *e;
4906 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4911 e = add_edge (rdg, idef, use);
4912 e->data = XNEW (struct rdg_edge);
4913 RDGE_TYPE (e) = flow_dd;
4914 RDGE_RELATION (e) = NULL;
4918 /* Creates the edges of the reduced dependence graph RDG. */
4921 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4924 struct data_dependence_relation *ddr;
4925 def_operand_p def_p;
4928 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4929 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4930 create_rdg_edge_for_ddr (rdg, ddr);
4932 for (i = 0; i < rdg->n_vertices; i++)
4933 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4935 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4938 /* Build the vertices of the reduced dependence graph RDG. */
4941 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4946 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4948 VEC (data_ref_loc, heap) *references;
4950 struct vertex *v = &(rdg->vertices[i]);
4951 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4952 struct rdg_vertex_info **slot;
4956 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4963 v->data = XNEW (struct rdg_vertex);
4964 RDG_STMT (rdg, i) = stmt;
4966 RDG_MEM_WRITE_STMT (rdg, i) = false;
4967 RDG_MEM_READS_STMT (rdg, i) = false;
4968 if (gimple_code (stmt) == GIMPLE_PHI)
4971 get_references_in_stmt (stmt, &references);
4972 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4974 RDG_MEM_WRITE_STMT (rdg, i) = true;
4976 RDG_MEM_READS_STMT (rdg, i) = true;
4978 VEC_free (data_ref_loc, heap, references);
4982 /* Initialize STMTS with all the statements of LOOP. When
4983 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4984 which we discover statements is important as
4985 generate_loops_for_partition is using the same traversal for
4986 identifying statements. */
4989 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4992 basic_block *bbs = get_loop_body_in_dom_order (loop);
4994 for (i = 0; i < loop->num_nodes; i++)
4996 basic_block bb = bbs[i];
4997 gimple_stmt_iterator bsi;
5000 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5001 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5003 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5005 stmt = gsi_stmt (bsi);
5006 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5007 VEC_safe_push (gimple, heap, *stmts, stmt);
5014 /* Returns true when all the dependences are computable. */
5017 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5022 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5023 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5029 /* Computes a hash function for element ELT. */
5032 hash_stmt_vertex_info (const void *elt)
5034 const struct rdg_vertex_info *const rvi =
5035 (const struct rdg_vertex_info *) elt;
5036 gimple stmt = rvi->stmt;
5038 return htab_hash_pointer (stmt);
5041 /* Compares database elements E1 and E2. */
5044 eq_stmt_vertex_info (const void *e1, const void *e2)
5046 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5047 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5049 return elt1->stmt == elt2->stmt;
5052 /* Free the element E. */
5055 hash_stmt_vertex_del (void *e)
5060 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5061 statement of the loop nest, and one edge per data dependence or
5062 scalar dependence. */
5065 build_empty_rdg (int n_stmts)
5067 int nb_data_refs = 10;
5068 struct graph *rdg = new_graph (n_stmts);
5070 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5071 eq_stmt_vertex_info, hash_stmt_vertex_del);
5075 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5076 statement of the loop nest, and one edge per data dependence or
5077 scalar dependence. */
5080 build_rdg (struct loop *loop,
5081 VEC (loop_p, heap) **loop_nest,
5082 VEC (ddr_p, heap) **dependence_relations,
5083 VEC (data_reference_p, heap) **datarefs)
5085 struct graph *rdg = NULL;
5086 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5088 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5089 dependence_relations);
5091 if (known_dependences_p (*dependence_relations))
5093 stmts_from_loop (loop, &stmts);
5094 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5095 create_rdg_vertices (rdg, stmts);
5096 create_rdg_edges (rdg, *dependence_relations);
5099 VEC_free (gimple, heap, stmts);
5103 /* Free the reduced dependence graph RDG. */
5106 free_rdg (struct graph *rdg)
5110 for (i = 0; i < rdg->n_vertices; i++)
5112 struct vertex *v = &(rdg->vertices[i]);
5113 struct graph_edge *e;
5115 for (e = v->succ; e; e = e->succ_next)
5121 htab_delete (rdg->indices);
5125 /* Initialize STMTS with all the statements of LOOP that contain a
5129 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5132 basic_block *bbs = get_loop_body_in_dom_order (loop);
5134 for (i = 0; i < loop->num_nodes; i++)
5136 basic_block bb = bbs[i];
5137 gimple_stmt_iterator bsi;
5139 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5140 if (gimple_vdef (gsi_stmt (bsi)))
5141 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5147 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5148 that contains a data reference on its LHS with a stride of the same
5149 size as its unit type. */
5152 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5156 struct data_reference *dr;
5159 || !gimple_vdef (stmt)
5160 || !is_gimple_assign (stmt)
5161 || !gimple_assign_single_p (stmt)
5162 || !(op1 = gimple_assign_rhs1 (stmt))
5163 || !(integer_zerop (op1) || real_zerop (op1)))
5166 dr = XCNEW (struct data_reference);
5167 op0 = gimple_assign_lhs (stmt);
5169 DR_STMT (dr) = stmt;
5172 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5173 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5179 /* Initialize STMTS with all the statements of LOOP that contain a
5180 store to memory of the form "A[i] = 0". */
5183 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5187 gimple_stmt_iterator si;
5189 basic_block *bbs = get_loop_body_in_dom_order (loop);
5191 for (i = 0; i < loop->num_nodes; i++)
5192 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5193 if ((stmt = gsi_stmt (si))
5194 && stmt_with_adjacent_zero_store_dr_p (stmt))
5195 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5200 /* For a data reference REF, return the declaration of its base
5201 address or NULL_TREE if the base is not determined. */
5204 ref_base_address (gimple stmt, data_ref_loc *ref)
5206 tree base = NULL_TREE;
5208 struct data_reference *dr = XCNEW (struct data_reference);
5210 DR_STMT (dr) = stmt;
5211 DR_REF (dr) = *ref->pos;
5212 dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5213 base_address = DR_BASE_ADDRESS (dr);
5218 switch (TREE_CODE (base_address))
5221 base = TREE_OPERAND (base_address, 0);
5225 base = base_address;
5234 /* Determines whether the statement from vertex V of the RDG has a
5235 definition used outside the loop that contains this statement. */
5238 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5240 gimple stmt = RDG_STMT (rdg, v);
5241 struct loop *loop = loop_containing_stmt (stmt);
5242 use_operand_p imm_use_p;
5243 imm_use_iterator iterator;
5245 def_operand_p def_p;
5250 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5252 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5254 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5262 /* Determines whether statements S1 and S2 access to similar memory
5263 locations. Two memory accesses are considered similar when they
5264 have the same base address declaration, i.e. when their
5265 ref_base_address is the same. */
5268 have_similar_memory_accesses (gimple s1, gimple s2)
5272 VEC (data_ref_loc, heap) *refs1, *refs2;
5273 data_ref_loc *ref1, *ref2;
5275 get_references_in_stmt (s1, &refs1);
5276 get_references_in_stmt (s2, &refs2);
5278 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5280 tree base1 = ref_base_address (s1, ref1);
5283 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5284 if (base1 == ref_base_address (s2, ref2))
5292 VEC_free (data_ref_loc, heap, refs1);
5293 VEC_free (data_ref_loc, heap, refs2);
5297 /* Helper function for the hashtab. */
5300 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5302 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5303 CONST_CAST_GIMPLE ((const_gimple) s2));
5306 /* Helper function for the hashtab. */
5309 ref_base_address_1 (const void *s)
5311 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5313 VEC (data_ref_loc, heap) *refs;
5317 get_references_in_stmt (stmt, &refs);
5319 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5322 res = htab_hash_pointer (ref_base_address (stmt, ref));
5326 VEC_free (data_ref_loc, heap, refs);
5330 /* Try to remove duplicated write data references from STMTS. */
5333 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5337 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5338 have_similar_memory_accesses_1, NULL);
5340 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5344 slot = htab_find_slot (seen, stmt, INSERT);
5347 VEC_ordered_remove (gimple, *stmts, i);
5350 *slot = (void *) stmt;
5358 /* Returns the index of PARAMETER in the parameters vector of the
5359 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5362 access_matrix_get_index_for_parameter (tree parameter,
5363 struct access_matrix *access_matrix)
5366 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5367 tree lambda_parameter;
5369 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5370 if (lambda_parameter == parameter)
5371 return i + AM_NB_INDUCTION_VARS (access_matrix);