1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
83 /* These RTL headers are needed for basic-block.h. */
85 #include "basic-block.h"
86 #include "diagnostic.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
91 #include "tree-chrec.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
97 static struct datadep_stats
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
128 /* Returns true iff A divides B. */
131 tree_fold_divides_p (const_tree a, const_tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into FILE all the dependence relations from DDRS. */
163 dump_data_dependence_relations (FILE *file,
164 VEC (ddr_p, heap) *ddrs)
167 struct data_dependence_relation *ddr;
169 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
170 dump_data_dependence_relation (file, ddr);
173 /* Dump function for a DATA_REFERENCE structure. */
176 dump_data_reference (FILE *outf,
177 struct data_reference *dr)
181 fprintf (outf, "(Data Ref: \n stmt: ");
182 print_generic_stmt (outf, DR_STMT (dr), 0);
183 fprintf (outf, " ref: ");
184 print_generic_stmt (outf, DR_REF (dr), 0);
185 fprintf (outf, " base_object: ");
186 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
190 fprintf (outf, " Access function %d: ", i);
191 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
193 fprintf (outf, ")\n");
196 /* Dumps the affine function described by FN to the file OUTF. */
199 dump_affine_function (FILE *outf, affine_fn fn)
204 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
205 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
207 fprintf (outf, " + ");
208 print_generic_expr (outf, coef, TDF_SLIM);
209 fprintf (outf, " * x_%u", i);
213 /* Dumps the conflict function CF to the file OUTF. */
216 dump_conflict_function (FILE *outf, conflict_function *cf)
220 if (cf->n == NO_DEPENDENCE)
221 fprintf (outf, "no dependence\n");
222 else if (cf->n == NOT_KNOWN)
223 fprintf (outf, "not known\n");
226 for (i = 0; i < cf->n; i++)
229 dump_affine_function (outf, cf->fns[i]);
230 fprintf (outf, "]\n");
235 /* Dump function for a SUBSCRIPT structure. */
238 dump_subscript (FILE *outf, struct subscript *subscript)
240 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
242 fprintf (outf, "\n (subscript \n");
243 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
244 dump_conflict_function (outf, cf);
245 if (CF_NONTRIVIAL_P (cf))
247 tree last_iteration = SUB_LAST_CONFLICT (subscript);
248 fprintf (outf, " last_conflict: ");
249 print_generic_stmt (outf, last_iteration, 0);
252 cf = SUB_CONFLICTS_IN_B (subscript);
253 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
254 dump_conflict_function (outf, cf);
255 if (CF_NONTRIVIAL_P (cf))
257 tree last_iteration = SUB_LAST_CONFLICT (subscript);
258 fprintf (outf, " last_conflict: ");
259 print_generic_stmt (outf, last_iteration, 0);
262 fprintf (outf, " (Subscript distance: ");
263 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
264 fprintf (outf, " )\n");
265 fprintf (outf, " )\n");
268 /* Print the classic direction vector DIRV to OUTF. */
271 print_direction_vector (FILE *outf,
277 for (eq = 0; eq < length; eq++)
279 enum data_dependence_direction dir = dirv[eq];
284 fprintf (outf, " +");
287 fprintf (outf, " -");
290 fprintf (outf, " =");
292 case dir_positive_or_equal:
293 fprintf (outf, " +=");
295 case dir_positive_or_negative:
296 fprintf (outf, " +-");
298 case dir_negative_or_equal:
299 fprintf (outf, " -=");
302 fprintf (outf, " *");
305 fprintf (outf, "indep");
309 fprintf (outf, "\n");
312 /* Print a vector of direction vectors. */
315 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
321 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
322 print_direction_vector (outf, v, length);
325 /* Print a vector of distance vectors. */
328 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
334 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
335 print_lambda_vector (outf, v, length);
341 debug_data_dependence_relation (struct data_dependence_relation *ddr)
343 dump_data_dependence_relation (stderr, ddr);
346 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
349 dump_data_dependence_relation (FILE *outf,
350 struct data_dependence_relation *ddr)
352 struct data_reference *dra, *drb;
356 fprintf (outf, "(Data Dep: \n");
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 fprintf (outf, " (don't know)\n");
360 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
361 fprintf (outf, " (no dependence)\n");
363 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
368 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
370 fprintf (outf, " access_fn_A: ");
371 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
372 fprintf (outf, " access_fn_B: ");
373 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
374 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
377 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
378 fprintf (outf, " loop nest: (");
379 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
380 fprintf (outf, "%d ", loopi->num);
381 fprintf (outf, ")\n");
383 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
385 fprintf (outf, " distance_vector: ");
386 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
390 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
392 fprintf (outf, " direction_vector: ");
393 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
398 fprintf (outf, ")\n");
401 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
404 dump_data_dependence_direction (FILE *file,
405 enum data_dependence_direction dir)
421 case dir_positive_or_negative:
422 fprintf (file, "+-");
425 case dir_positive_or_equal:
426 fprintf (file, "+=");
429 case dir_negative_or_equal:
430 fprintf (file, "-=");
442 /* Dumps the distance and direction vectors in FILE. DDRS contains
443 the dependence relations, and VECT_SIZE is the size of the
444 dependence vectors, or in other words the number of loops in the
448 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
451 struct data_dependence_relation *ddr;
454 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
455 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
457 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
459 fprintf (file, "DISTANCE_V (");
460 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
461 fprintf (file, ")\n");
464 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
466 fprintf (file, "DIRECTION_V (");
467 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
468 fprintf (file, ")\n");
472 fprintf (file, "\n\n");
475 /* Dumps the data dependence relations DDRS in FILE. */
478 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
481 struct data_dependence_relation *ddr;
483 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
484 dump_data_dependence_relation (file, ddr);
486 fprintf (file, "\n\n");
489 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
490 will be ssizetype. */
493 split_constant_offset (tree exp, tree *var, tree *off)
495 tree type = TREE_TYPE (exp), otype;
502 otype = TREE_TYPE (exp);
503 code = TREE_CODE (exp);
508 *var = build_int_cst (type, 0);
509 *off = fold_convert (ssizetype, exp);
512 case POINTER_PLUS_EXPR:
517 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
518 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
519 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
521 *off = size_binop (code, off0, off1);
525 off1 = TREE_OPERAND (exp, 1);
526 if (TREE_CODE (off1) != INTEGER_CST)
529 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
530 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
532 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
537 tree op, base, poffset;
538 HOST_WIDE_INT pbitsize, pbitpos;
539 enum machine_mode pmode;
540 int punsignedp, pvolatilep;
542 op = TREE_OPERAND (exp, 0);
543 if (!handled_component_p (op))
546 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
547 &pmode, &punsignedp, &pvolatilep, false);
549 if (pbitpos % BITS_PER_UNIT != 0)
551 base = build_fold_addr_expr (base);
552 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
556 split_constant_offset (poffset, &poffset, &off1);
557 off0 = size_binop (PLUS_EXPR, off0, off1);
558 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base),
560 fold_convert (TREE_TYPE (base), poffset));
563 *var = fold_convert (type, base);
570 tree def_stmt = SSA_NAME_DEF_STMT (exp);
571 if (TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
573 tree def_stmt_rhs = GIMPLE_STMT_OPERAND (def_stmt, 1);
575 if (!TREE_SIDE_EFFECTS (def_stmt_rhs)
576 && EXPR_P (def_stmt_rhs)
577 && !REFERENCE_CLASS_P (def_stmt_rhs)
578 && !get_call_expr_in (def_stmt_rhs))
580 split_constant_offset (def_stmt_rhs, &var0, &off0);
581 var0 = fold_convert (type, var0);
594 *off = ssize_int (0);
597 /* Returns the address ADDR of an object in a canonical shape (without nop
598 casts, and with type of pointer to the object). */
601 canonicalize_base_object_address (tree addr)
607 /* The base address may be obtained by casting from integer, in that case
609 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
612 if (TREE_CODE (addr) != ADDR_EXPR)
615 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
618 /* Analyzes the behavior of the memory reference DR in the innermost loop that
622 dr_analyze_innermost (struct data_reference *dr)
624 tree stmt = DR_STMT (dr);
625 struct loop *loop = loop_containing_stmt (stmt);
626 tree ref = DR_REF (dr);
627 HOST_WIDE_INT pbitsize, pbitpos;
629 enum machine_mode pmode;
630 int punsignedp, pvolatilep;
631 affine_iv base_iv, offset_iv;
632 tree init, dinit, step;
634 if (dump_file && (dump_flags & TDF_DETAILS))
635 fprintf (dump_file, "analyze_innermost: ");
637 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
638 &pmode, &punsignedp, &pvolatilep, false);
639 gcc_assert (base != NULL_TREE);
641 if (pbitpos % BITS_PER_UNIT != 0)
643 if (dump_file && (dump_flags & TDF_DETAILS))
644 fprintf (dump_file, "failed: bit offset alignment.\n");
648 base = build_fold_addr_expr (base);
649 if (!simple_iv (loop, stmt, base, &base_iv, false))
651 if (dump_file && (dump_flags & TDF_DETAILS))
652 fprintf (dump_file, "failed: evolution of base is not affine.\n");
657 offset_iv.base = ssize_int (0);
658 offset_iv.step = ssize_int (0);
660 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
662 if (dump_file && (dump_flags & TDF_DETAILS))
663 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
667 init = ssize_int (pbitpos / BITS_PER_UNIT);
668 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
669 init = size_binop (PLUS_EXPR, init, dinit);
670 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
671 init = size_binop (PLUS_EXPR, init, dinit);
673 step = size_binop (PLUS_EXPR,
674 fold_convert (ssizetype, base_iv.step),
675 fold_convert (ssizetype, offset_iv.step));
677 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
679 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
683 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 fprintf (dump_file, "success.\n");
689 /* Determines the base object and the list of indices of memory reference
690 DR, analyzed in loop nest NEST. */
693 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
695 tree stmt = DR_STMT (dr);
696 struct loop *loop = loop_containing_stmt (stmt);
697 VEC (tree, heap) *access_fns = NULL;
698 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
699 tree base, off, access_fn;
701 while (handled_component_p (aref))
703 if (TREE_CODE (aref) == ARRAY_REF)
705 op = TREE_OPERAND (aref, 1);
706 access_fn = analyze_scalar_evolution (loop, op);
707 access_fn = resolve_mixers (nest, access_fn);
708 VEC_safe_push (tree, heap, access_fns, access_fn);
710 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
713 aref = TREE_OPERAND (aref, 0);
716 if (INDIRECT_REF_P (aref))
718 op = TREE_OPERAND (aref, 0);
719 access_fn = analyze_scalar_evolution (loop, op);
720 access_fn = resolve_mixers (nest, access_fn);
721 base = initial_condition (access_fn);
722 split_constant_offset (base, &base, &off);
723 access_fn = chrec_replace_initial_condition (access_fn,
724 fold_convert (TREE_TYPE (base), off));
726 TREE_OPERAND (aref, 0) = base;
727 VEC_safe_push (tree, heap, access_fns, access_fn);
730 DR_BASE_OBJECT (dr) = ref;
731 DR_ACCESS_FNS (dr) = access_fns;
734 /* Extracts the alias analysis information from the memory reference DR. */
737 dr_analyze_alias (struct data_reference *dr)
739 tree stmt = DR_STMT (dr);
740 tree ref = DR_REF (dr);
741 tree base = get_base_address (ref), addr, smt = NULL_TREE;
748 else if (INDIRECT_REF_P (base))
750 addr = TREE_OPERAND (base, 0);
751 if (TREE_CODE (addr) == SSA_NAME)
753 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
754 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
758 DR_SYMBOL_TAG (dr) = smt;
759 if (smt && var_can_have_subvars (smt))
760 DR_SUBVARS (dr) = get_subvars_for_var (smt);
762 vops = BITMAP_ALLOC (NULL);
763 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
765 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
771 /* Returns true if the address of DR is invariant. */
774 dr_address_invariant_p (struct data_reference *dr)
779 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
780 if (tree_contains_chrecs (idx, NULL))
786 /* Frees data reference DR. */
789 free_data_ref (data_reference_p dr)
791 BITMAP_FREE (DR_VOPS (dr));
792 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
796 /* Analyzes memory reference MEMREF accessed in STMT. The reference
797 is read if IS_READ is true, write otherwise. Returns the
798 data_reference description of MEMREF. NEST is the outermost loop of the
799 loop nest in that the reference should be analyzed. */
801 struct data_reference *
802 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
804 struct data_reference *dr;
806 if (dump_file && (dump_flags & TDF_DETAILS))
808 fprintf (dump_file, "Creating dr for ");
809 print_generic_expr (dump_file, memref, TDF_SLIM);
810 fprintf (dump_file, "\n");
813 dr = XCNEW (struct data_reference);
815 DR_REF (dr) = memref;
816 DR_IS_READ (dr) = is_read;
818 dr_analyze_innermost (dr);
819 dr_analyze_indices (dr, nest);
820 dr_analyze_alias (dr);
822 if (dump_file && (dump_flags & TDF_DETAILS))
824 fprintf (dump_file, "\tbase_address: ");
825 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
826 fprintf (dump_file, "\n\toffset from base address: ");
827 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
828 fprintf (dump_file, "\n\tconstant offset from base address: ");
829 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
830 fprintf (dump_file, "\n\tstep: ");
831 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
832 fprintf (dump_file, "\n\taligned to: ");
833 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
834 fprintf (dump_file, "\n\tbase_object: ");
835 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
836 fprintf (dump_file, "\n\tsymbol tag: ");
837 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
838 fprintf (dump_file, "\n");
844 /* Returns true if FNA == FNB. */
847 affine_function_equal_p (affine_fn fna, affine_fn fnb)
849 unsigned i, n = VEC_length (tree, fna);
851 if (n != VEC_length (tree, fnb))
854 for (i = 0; i < n; i++)
855 if (!operand_equal_p (VEC_index (tree, fna, i),
856 VEC_index (tree, fnb, i), 0))
862 /* If all the functions in CF are the same, returns one of them,
863 otherwise returns NULL. */
866 common_affine_function (conflict_function *cf)
871 if (!CF_NONTRIVIAL_P (cf))
876 for (i = 1; i < cf->n; i++)
877 if (!affine_function_equal_p (comm, cf->fns[i]))
883 /* Returns the base of the affine function FN. */
886 affine_function_base (affine_fn fn)
888 return VEC_index (tree, fn, 0);
891 /* Returns true if FN is a constant. */
894 affine_function_constant_p (affine_fn fn)
899 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
900 if (!integer_zerop (coef))
906 /* Returns true if FN is the zero constant function. */
909 affine_function_zero_p (affine_fn fn)
911 return (integer_zerop (affine_function_base (fn))
912 && affine_function_constant_p (fn));
915 /* Applies operation OP on affine functions FNA and FNB, and returns the
919 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
925 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
927 n = VEC_length (tree, fna);
928 m = VEC_length (tree, fnb);
932 n = VEC_length (tree, fnb);
933 m = VEC_length (tree, fna);
936 ret = VEC_alloc (tree, heap, m);
937 for (i = 0; i < n; i++)
938 VEC_quick_push (tree, ret,
939 fold_build2 (op, integer_type_node,
940 VEC_index (tree, fna, i),
941 VEC_index (tree, fnb, i)));
943 for (; VEC_iterate (tree, fna, i, coef); i++)
944 VEC_quick_push (tree, ret,
945 fold_build2 (op, integer_type_node,
946 coef, integer_zero_node));
947 for (; VEC_iterate (tree, fnb, i, coef); i++)
948 VEC_quick_push (tree, ret,
949 fold_build2 (op, integer_type_node,
950 integer_zero_node, coef));
955 /* Returns the sum of affine functions FNA and FNB. */
958 affine_fn_plus (affine_fn fna, affine_fn fnb)
960 return affine_fn_op (PLUS_EXPR, fna, fnb);
963 /* Returns the difference of affine functions FNA and FNB. */
966 affine_fn_minus (affine_fn fna, affine_fn fnb)
968 return affine_fn_op (MINUS_EXPR, fna, fnb);
971 /* Frees affine function FN. */
974 affine_fn_free (affine_fn fn)
976 VEC_free (tree, heap, fn);
979 /* Determine for each subscript in the data dependence relation DDR
983 compute_subscript_distance (struct data_dependence_relation *ddr)
985 conflict_function *cf_a, *cf_b;
986 affine_fn fn_a, fn_b, diff;
988 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
992 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
994 struct subscript *subscript;
996 subscript = DDR_SUBSCRIPT (ddr, i);
997 cf_a = SUB_CONFLICTS_IN_A (subscript);
998 cf_b = SUB_CONFLICTS_IN_B (subscript);
1000 fn_a = common_affine_function (cf_a);
1001 fn_b = common_affine_function (cf_b);
1004 SUB_DISTANCE (subscript) = chrec_dont_know;
1007 diff = affine_fn_minus (fn_a, fn_b);
1009 if (affine_function_constant_p (diff))
1010 SUB_DISTANCE (subscript) = affine_function_base (diff);
1012 SUB_DISTANCE (subscript) = chrec_dont_know;
1014 affine_fn_free (diff);
1019 /* Returns the conflict function for "unknown". */
1021 static conflict_function *
1022 conflict_fn_not_known (void)
1024 conflict_function *fn = XCNEW (conflict_function);
1030 /* Returns the conflict function for "independent". */
1032 static conflict_function *
1033 conflict_fn_no_dependence (void)
1035 conflict_function *fn = XCNEW (conflict_function);
1036 fn->n = NO_DEPENDENCE;
1041 /* Returns true if the address of OBJ is invariant in LOOP. */
1044 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1046 while (handled_component_p (obj))
1048 if (TREE_CODE (obj) == ARRAY_REF)
1050 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1051 need to check the stride and the lower bound of the reference. */
1052 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1054 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1058 else if (TREE_CODE (obj) == COMPONENT_REF)
1060 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1064 obj = TREE_OPERAND (obj, 0);
1067 if (!INDIRECT_REF_P (obj))
1070 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1074 /* Returns true if A and B are accesses to different objects, or to different
1075 fields of the same object. */
1078 disjoint_objects_p (tree a, tree b)
1080 tree base_a, base_b;
1081 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1084 base_a = get_base_address (a);
1085 base_b = get_base_address (b);
1089 && base_a != base_b)
1092 if (!operand_equal_p (base_a, base_b, 0))
1095 /* Compare the component references of A and B. We must start from the inner
1096 ones, so record them to the vector first. */
1097 while (handled_component_p (a))
1099 VEC_safe_push (tree, heap, comp_a, a);
1100 a = TREE_OPERAND (a, 0);
1102 while (handled_component_p (b))
1104 VEC_safe_push (tree, heap, comp_b, b);
1105 b = TREE_OPERAND (b, 0);
1111 if (VEC_length (tree, comp_a) == 0
1112 || VEC_length (tree, comp_b) == 0)
1115 a = VEC_pop (tree, comp_a);
1116 b = VEC_pop (tree, comp_b);
1118 /* Real and imaginary part of a variable do not alias. */
1119 if ((TREE_CODE (a) == REALPART_EXPR
1120 && TREE_CODE (b) == IMAGPART_EXPR)
1121 || (TREE_CODE (a) == IMAGPART_EXPR
1122 && TREE_CODE (b) == REALPART_EXPR))
1128 if (TREE_CODE (a) != TREE_CODE (b))
1131 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1132 DR_BASE_OBJECT are always zero. */
1133 if (TREE_CODE (a) == ARRAY_REF)
1135 else if (TREE_CODE (a) == COMPONENT_REF)
1137 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1140 /* Different fields of unions may overlap. */
1141 base_a = TREE_OPERAND (a, 0);
1142 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1145 /* Different fields of structures cannot. */
1153 VEC_free (tree, heap, comp_a);
1154 VEC_free (tree, heap, comp_b);
1159 /* Returns false if we can prove that data references A and B do not alias,
1163 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1165 const_tree addr_a = DR_BASE_ADDRESS (a);
1166 const_tree addr_b = DR_BASE_ADDRESS (b);
1167 const_tree type_a, type_b;
1168 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1170 /* If the sets of virtual operands are disjoint, the memory references do not
1172 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1175 /* If the accessed objects are disjoint, the memory references do not
1177 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1180 if (!addr_a || !addr_b)
1183 /* If the references are based on different static objects, they cannot alias
1184 (PTA should be able to disambiguate such accesses, but often it fails to,
1185 since currently we cannot distinguish between pointer and offset in pointer
1187 if (TREE_CODE (addr_a) == ADDR_EXPR
1188 && TREE_CODE (addr_b) == ADDR_EXPR)
1189 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1191 /* An instruction writing through a restricted pointer is "independent" of any
1192 instruction reading or writing through a different restricted pointer,
1193 in the same block/scope. */
1195 type_a = TREE_TYPE (addr_a);
1196 type_b = TREE_TYPE (addr_b);
1197 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1199 if (TREE_CODE (addr_a) == SSA_NAME)
1200 decl_a = SSA_NAME_VAR (addr_a);
1201 if (TREE_CODE (addr_b) == SSA_NAME)
1202 decl_b = SSA_NAME_VAR (addr_b);
1204 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1205 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1206 && decl_a && DECL_P (decl_a)
1207 && decl_b && DECL_P (decl_b)
1209 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1210 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1216 /* Initialize a data dependence relation between data accesses A and
1217 B. NB_LOOPS is the number of loops surrounding the references: the
1218 size of the classic distance/direction vectors. */
1220 static struct data_dependence_relation *
1221 initialize_data_dependence_relation (struct data_reference *a,
1222 struct data_reference *b,
1223 VEC (loop_p, heap) *loop_nest)
1225 struct data_dependence_relation *res;
1228 res = XNEW (struct data_dependence_relation);
1231 DDR_LOOP_NEST (res) = NULL;
1232 DDR_REVERSED_P (res) = false;
1233 DDR_SUBSCRIPTS (res) = NULL;
1234 DDR_DIR_VECTS (res) = NULL;
1235 DDR_DIST_VECTS (res) = NULL;
1237 if (a == NULL || b == NULL)
1239 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1243 /* If the data references do not alias, then they are independent. */
1244 if (!dr_may_alias_p (a, b))
1246 DDR_ARE_DEPENDENT (res) = chrec_known;
1250 /* If the references do not access the same object, we do not know
1251 whether they alias or not. */
1252 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1254 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1258 /* If the base of the object is not invariant in the loop nest, we cannot
1259 analyze it. TODO -- in fact, it would suffice to record that there may
1260 be arbitrary dependences in the loops where the base object varies. */
1261 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1262 DR_BASE_OBJECT (a)))
1264 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1268 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1270 DDR_AFFINE_P (res) = true;
1271 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1272 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1273 DDR_LOOP_NEST (res) = loop_nest;
1274 DDR_INNER_LOOP (res) = 0;
1276 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1278 struct subscript *subscript;
1280 subscript = XNEW (struct subscript);
1281 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1282 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1283 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1284 SUB_DISTANCE (subscript) = chrec_dont_know;
1285 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1291 /* Frees memory used by the conflict function F. */
1294 free_conflict_function (conflict_function *f)
1298 if (CF_NONTRIVIAL_P (f))
1300 for (i = 0; i < f->n; i++)
1301 affine_fn_free (f->fns[i]);
1306 /* Frees memory used by SUBSCRIPTS. */
1309 free_subscripts (VEC (subscript_p, heap) *subscripts)
1314 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1316 free_conflict_function (s->conflicting_iterations_in_a);
1317 free_conflict_function (s->conflicting_iterations_in_b);
1319 VEC_free (subscript_p, heap, subscripts);
1322 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1326 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1329 if (dump_file && (dump_flags & TDF_DETAILS))
1331 fprintf (dump_file, "(dependence classified: ");
1332 print_generic_expr (dump_file, chrec, 0);
1333 fprintf (dump_file, ")\n");
1336 DDR_ARE_DEPENDENT (ddr) = chrec;
1337 free_subscripts (DDR_SUBSCRIPTS (ddr));
1338 DDR_SUBSCRIPTS (ddr) = NULL;
1341 /* The dependence relation DDR cannot be represented by a distance
1345 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1347 if (dump_file && (dump_flags & TDF_DETAILS))
1348 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1350 DDR_AFFINE_P (ddr) = false;
1355 /* This section contains the classic Banerjee tests. */
1357 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1358 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1361 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1363 return (evolution_function_is_constant_p (chrec_a)
1364 && evolution_function_is_constant_p (chrec_b));
1367 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1368 variable, i.e., if the SIV (Single Index Variable) test is true. */
1371 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1373 if ((evolution_function_is_constant_p (chrec_a)
1374 && evolution_function_is_univariate_p (chrec_b))
1375 || (evolution_function_is_constant_p (chrec_b)
1376 && evolution_function_is_univariate_p (chrec_a)))
1379 if (evolution_function_is_univariate_p (chrec_a)
1380 && evolution_function_is_univariate_p (chrec_b))
1382 switch (TREE_CODE (chrec_a))
1384 case POLYNOMIAL_CHREC:
1385 switch (TREE_CODE (chrec_b))
1387 case POLYNOMIAL_CHREC:
1388 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1403 /* Creates a conflict function with N dimensions. The affine functions
1404 in each dimension follow. */
1406 static conflict_function *
1407 conflict_fn (unsigned n, ...)
1410 conflict_function *ret = XCNEW (conflict_function);
1413 gcc_assert (0 < n && n <= MAX_DIM);
1417 for (i = 0; i < n; i++)
1418 ret->fns[i] = va_arg (ap, affine_fn);
1424 /* Returns constant affine function with value CST. */
1427 affine_fn_cst (tree cst)
1429 affine_fn fn = VEC_alloc (tree, heap, 1);
1430 VEC_quick_push (tree, fn, cst);
1434 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1437 affine_fn_univar (tree cst, unsigned dim, tree coef)
1439 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1442 gcc_assert (dim > 0);
1443 VEC_quick_push (tree, fn, cst);
1444 for (i = 1; i < dim; i++)
1445 VEC_quick_push (tree, fn, integer_zero_node);
1446 VEC_quick_push (tree, fn, coef);
1450 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1451 *OVERLAPS_B are initialized to the functions that describe the
1452 relation between the elements accessed twice by CHREC_A and
1453 CHREC_B. For k >= 0, the following property is verified:
1455 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1458 analyze_ziv_subscript (tree chrec_a,
1460 conflict_function **overlaps_a,
1461 conflict_function **overlaps_b,
1462 tree *last_conflicts)
1465 dependence_stats.num_ziv++;
1467 if (dump_file && (dump_flags & TDF_DETAILS))
1468 fprintf (dump_file, "(analyze_ziv_subscript \n");
1470 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1471 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1472 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
1474 switch (TREE_CODE (difference))
1477 if (integer_zerop (difference))
1479 /* The difference is equal to zero: the accessed index
1480 overlaps for each iteration in the loop. */
1481 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1482 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1483 *last_conflicts = chrec_dont_know;
1484 dependence_stats.num_ziv_dependent++;
1488 /* The accesses do not overlap. */
1489 *overlaps_a = conflict_fn_no_dependence ();
1490 *overlaps_b = conflict_fn_no_dependence ();
1491 *last_conflicts = integer_zero_node;
1492 dependence_stats.num_ziv_independent++;
1497 /* We're not sure whether the indexes overlap. For the moment,
1498 conservatively answer "don't know". */
1499 if (dump_file && (dump_flags & TDF_DETAILS))
1500 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1502 *overlaps_a = conflict_fn_not_known ();
1503 *overlaps_b = conflict_fn_not_known ();
1504 *last_conflicts = chrec_dont_know;
1505 dependence_stats.num_ziv_unimplemented++;
1509 if (dump_file && (dump_flags & TDF_DETAILS))
1510 fprintf (dump_file, ")\n");
1513 /* Sets NIT to the estimated number of executions of the statements in
1514 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1515 large as the number of iterations. If we have no reliable estimate,
1516 the function returns false, otherwise returns true. */
1519 estimated_loop_iterations (struct loop *loop, bool conservative,
1522 estimate_numbers_of_iterations_loop (loop);
1525 if (!loop->any_upper_bound)
1528 *nit = loop->nb_iterations_upper_bound;
1532 if (!loop->any_estimate)
1535 *nit = loop->nb_iterations_estimate;
1541 /* Similar to estimated_loop_iterations, but returns the estimate only
1542 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1543 on the number of iterations of LOOP could not be derived, returns -1. */
1546 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1549 HOST_WIDE_INT hwi_nit;
1551 if (!estimated_loop_iterations (loop, conservative, &nit))
1554 if (!double_int_fits_in_shwi_p (nit))
1556 hwi_nit = double_int_to_shwi (nit);
1558 return hwi_nit < 0 ? -1 : hwi_nit;
1561 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1562 and only if it fits to the int type. If this is not the case, or the
1563 estimate on the number of iterations of LOOP could not be derived, returns
1567 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1572 if (!estimated_loop_iterations (loop, conservative, &nit))
1573 return chrec_dont_know;
1575 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1576 if (!double_int_fits_to_tree_p (type, nit))
1577 return chrec_dont_know;
1579 return double_int_to_tree (type, nit);
1582 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1583 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1584 *OVERLAPS_B are initialized to the functions that describe the
1585 relation between the elements accessed twice by CHREC_A and
1586 CHREC_B. For k >= 0, the following property is verified:
1588 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1591 analyze_siv_subscript_cst_affine (tree chrec_a,
1593 conflict_function **overlaps_a,
1594 conflict_function **overlaps_b,
1595 tree *last_conflicts)
1597 bool value0, value1, value2;
1598 tree difference, tmp;
1600 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1601 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1602 difference = chrec_fold_minus
1603 (integer_type_node, initial_condition (chrec_b), chrec_a);
1605 if (!chrec_is_positive (initial_condition (difference), &value0))
1607 if (dump_file && (dump_flags & TDF_DETAILS))
1608 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1610 dependence_stats.num_siv_unimplemented++;
1611 *overlaps_a = conflict_fn_not_known ();
1612 *overlaps_b = conflict_fn_not_known ();
1613 *last_conflicts = chrec_dont_know;
1618 if (value0 == false)
1620 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1622 if (dump_file && (dump_flags & TDF_DETAILS))
1623 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1625 *overlaps_a = conflict_fn_not_known ();
1626 *overlaps_b = conflict_fn_not_known ();
1627 *last_conflicts = chrec_dont_know;
1628 dependence_stats.num_siv_unimplemented++;
1637 chrec_b = {10, +, 1}
1640 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1642 HOST_WIDE_INT numiter;
1643 struct loop *loop = get_chrec_loop (chrec_b);
1645 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1646 tmp = fold_build2 (EXACT_DIV_EXPR, integer_type_node,
1647 fold_build1 (ABS_EXPR,
1650 CHREC_RIGHT (chrec_b));
1651 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1652 *last_conflicts = integer_one_node;
1655 /* Perform weak-zero siv test to see if overlap is
1656 outside the loop bounds. */
1657 numiter = estimated_loop_iterations_int (loop, false);
1660 && compare_tree_int (tmp, numiter) > 0)
1662 free_conflict_function (*overlaps_a);
1663 free_conflict_function (*overlaps_b);
1664 *overlaps_a = conflict_fn_no_dependence ();
1665 *overlaps_b = conflict_fn_no_dependence ();
1666 *last_conflicts = integer_zero_node;
1667 dependence_stats.num_siv_independent++;
1670 dependence_stats.num_siv_dependent++;
1674 /* When the step does not divide the difference, there are
1678 *overlaps_a = conflict_fn_no_dependence ();
1679 *overlaps_b = conflict_fn_no_dependence ();
1680 *last_conflicts = integer_zero_node;
1681 dependence_stats.num_siv_independent++;
1690 chrec_b = {10, +, -1}
1692 In this case, chrec_a will not overlap with chrec_b. */
1693 *overlaps_a = conflict_fn_no_dependence ();
1694 *overlaps_b = conflict_fn_no_dependence ();
1695 *last_conflicts = integer_zero_node;
1696 dependence_stats.num_siv_independent++;
1703 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1705 if (dump_file && (dump_flags & TDF_DETAILS))
1706 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1708 *overlaps_a = conflict_fn_not_known ();
1709 *overlaps_b = conflict_fn_not_known ();
1710 *last_conflicts = chrec_dont_know;
1711 dependence_stats.num_siv_unimplemented++;
1716 if (value2 == false)
1720 chrec_b = {10, +, -1}
1722 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1724 HOST_WIDE_INT numiter;
1725 struct loop *loop = get_chrec_loop (chrec_b);
1727 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1728 tmp = fold_build2 (EXACT_DIV_EXPR,
1729 integer_type_node, difference,
1730 CHREC_RIGHT (chrec_b));
1731 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1732 *last_conflicts = integer_one_node;
1734 /* Perform weak-zero siv test to see if overlap is
1735 outside the loop bounds. */
1736 numiter = estimated_loop_iterations_int (loop, false);
1739 && compare_tree_int (tmp, numiter) > 0)
1741 free_conflict_function (*overlaps_a);
1742 free_conflict_function (*overlaps_b);
1743 *overlaps_a = conflict_fn_no_dependence ();
1744 *overlaps_b = conflict_fn_no_dependence ();
1745 *last_conflicts = integer_zero_node;
1746 dependence_stats.num_siv_independent++;
1749 dependence_stats.num_siv_dependent++;
1753 /* When the step does not divide the difference, there
1757 *overlaps_a = conflict_fn_no_dependence ();
1758 *overlaps_b = conflict_fn_no_dependence ();
1759 *last_conflicts = integer_zero_node;
1760 dependence_stats.num_siv_independent++;
1770 In this case, chrec_a will not overlap with chrec_b. */
1771 *overlaps_a = conflict_fn_no_dependence ();
1772 *overlaps_b = conflict_fn_no_dependence ();
1773 *last_conflicts = integer_zero_node;
1774 dependence_stats.num_siv_independent++;
1782 /* Helper recursive function for initializing the matrix A. Returns
1783 the initial value of CHREC. */
1786 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1790 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1791 return int_cst_value (chrec);
1793 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1794 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1797 #define FLOOR_DIV(x,y) ((x) / (y))
1799 /* Solves the special case of the Diophantine equation:
1800 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1802 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1803 number of iterations that loops X and Y run. The overlaps will be
1804 constructed as evolutions in dimension DIM. */
1807 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1808 affine_fn *overlaps_a,
1809 affine_fn *overlaps_b,
1810 tree *last_conflicts, int dim)
1812 if (((step_a > 0 && step_b > 0)
1813 || (step_a < 0 && step_b < 0)))
1815 int step_overlaps_a, step_overlaps_b;
1816 int gcd_steps_a_b, last_conflict, tau2;
1818 gcd_steps_a_b = gcd (step_a, step_b);
1819 step_overlaps_a = step_b / gcd_steps_a_b;
1820 step_overlaps_b = step_a / gcd_steps_a_b;
1824 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1825 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1826 last_conflict = tau2;
1827 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1830 *last_conflicts = chrec_dont_know;
1832 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1833 build_int_cst (NULL_TREE,
1835 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1836 build_int_cst (NULL_TREE,
1842 *overlaps_a = affine_fn_cst (integer_zero_node);
1843 *overlaps_b = affine_fn_cst (integer_zero_node);
1844 *last_conflicts = integer_zero_node;
1848 /* Solves the special case of a Diophantine equation where CHREC_A is
1849 an affine bivariate function, and CHREC_B is an affine univariate
1850 function. For example,
1852 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1854 has the following overlapping functions:
1856 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1857 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1858 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1860 FORNOW: This is a specialized implementation for a case occurring in
1861 a common benchmark. Implement the general algorithm. */
1864 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1865 conflict_function **overlaps_a,
1866 conflict_function **overlaps_b,
1867 tree *last_conflicts)
1869 bool xz_p, yz_p, xyz_p;
1870 int step_x, step_y, step_z;
1871 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1872 affine_fn overlaps_a_xz, overlaps_b_xz;
1873 affine_fn overlaps_a_yz, overlaps_b_yz;
1874 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1875 affine_fn ova1, ova2, ovb;
1876 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1878 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1879 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1880 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1883 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1885 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1886 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1888 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1890 if (dump_file && (dump_flags & TDF_DETAILS))
1891 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1893 *overlaps_a = conflict_fn_not_known ();
1894 *overlaps_b = conflict_fn_not_known ();
1895 *last_conflicts = chrec_dont_know;
1899 niter = MIN (niter_x, niter_z);
1900 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1903 &last_conflicts_xz, 1);
1904 niter = MIN (niter_y, niter_z);
1905 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1908 &last_conflicts_yz, 2);
1909 niter = MIN (niter_x, niter_z);
1910 niter = MIN (niter_y, niter);
1911 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1914 &last_conflicts_xyz, 3);
1916 xz_p = !integer_zerop (last_conflicts_xz);
1917 yz_p = !integer_zerop (last_conflicts_yz);
1918 xyz_p = !integer_zerop (last_conflicts_xyz);
1920 if (xz_p || yz_p || xyz_p)
1922 ova1 = affine_fn_cst (integer_zero_node);
1923 ova2 = affine_fn_cst (integer_zero_node);
1924 ovb = affine_fn_cst (integer_zero_node);
1927 affine_fn t0 = ova1;
1930 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1931 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1932 affine_fn_free (t0);
1933 affine_fn_free (t2);
1934 *last_conflicts = last_conflicts_xz;
1938 affine_fn t0 = ova2;
1941 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1942 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1943 affine_fn_free (t0);
1944 affine_fn_free (t2);
1945 *last_conflicts = last_conflicts_yz;
1949 affine_fn t0 = ova1;
1950 affine_fn t2 = ova2;
1953 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1954 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1955 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1956 affine_fn_free (t0);
1957 affine_fn_free (t2);
1958 affine_fn_free (t4);
1959 *last_conflicts = last_conflicts_xyz;
1961 *overlaps_a = conflict_fn (2, ova1, ova2);
1962 *overlaps_b = conflict_fn (1, ovb);
1966 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1967 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1968 *last_conflicts = integer_zero_node;
1971 affine_fn_free (overlaps_a_xz);
1972 affine_fn_free (overlaps_b_xz);
1973 affine_fn_free (overlaps_a_yz);
1974 affine_fn_free (overlaps_b_yz);
1975 affine_fn_free (overlaps_a_xyz);
1976 affine_fn_free (overlaps_b_xyz);
1979 /* Determines the overlapping elements due to accesses CHREC_A and
1980 CHREC_B, that are affine functions. This function cannot handle
1981 symbolic evolution functions, ie. when initial conditions are
1982 parameters, because it uses lambda matrices of integers. */
1985 analyze_subscript_affine_affine (tree chrec_a,
1987 conflict_function **overlaps_a,
1988 conflict_function **overlaps_b,
1989 tree *last_conflicts)
1991 unsigned nb_vars_a, nb_vars_b, dim;
1992 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
1993 lambda_matrix A, U, S;
1995 if (eq_evolutions_p (chrec_a, chrec_b))
1997 /* The accessed index overlaps for each iteration in the
1999 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2000 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2001 *last_conflicts = chrec_dont_know;
2004 if (dump_file && (dump_flags & TDF_DETAILS))
2005 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2007 /* For determining the initial intersection, we have to solve a
2008 Diophantine equation. This is the most time consuming part.
2010 For answering to the question: "Is there a dependence?" we have
2011 to prove that there exists a solution to the Diophantine
2012 equation, and that the solution is in the iteration domain,
2013 i.e. the solution is positive or zero, and that the solution
2014 happens before the upper bound loop.nb_iterations. Otherwise
2015 there is no dependence. This function outputs a description of
2016 the iterations that hold the intersections. */
2018 nb_vars_a = nb_vars_in_chrec (chrec_a);
2019 nb_vars_b = nb_vars_in_chrec (chrec_b);
2021 dim = nb_vars_a + nb_vars_b;
2022 U = lambda_matrix_new (dim, dim);
2023 A = lambda_matrix_new (dim, 1);
2024 S = lambda_matrix_new (dim, 1);
2026 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
2027 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
2028 gamma = init_b - init_a;
2030 /* Don't do all the hard work of solving the Diophantine equation
2031 when we already know the solution: for example,
2034 | gamma = 3 - 3 = 0.
2035 Then the first overlap occurs during the first iterations:
2036 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2040 if (nb_vars_a == 1 && nb_vars_b == 1)
2042 HOST_WIDE_INT step_a, step_b;
2043 HOST_WIDE_INT niter, niter_a, niter_b;
2046 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2048 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2050 niter = MIN (niter_a, niter_b);
2051 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2052 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2054 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2057 *overlaps_a = conflict_fn (1, ova);
2058 *overlaps_b = conflict_fn (1, ovb);
2061 else if (nb_vars_a == 2 && nb_vars_b == 1)
2062 compute_overlap_steps_for_affine_1_2
2063 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2065 else if (nb_vars_a == 1 && nb_vars_b == 2)
2066 compute_overlap_steps_for_affine_1_2
2067 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2071 if (dump_file && (dump_flags & TDF_DETAILS))
2072 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2073 *overlaps_a = conflict_fn_not_known ();
2074 *overlaps_b = conflict_fn_not_known ();
2075 *last_conflicts = chrec_dont_know;
2077 goto end_analyze_subs_aa;
2081 lambda_matrix_right_hermite (A, dim, 1, S, U);
2086 lambda_matrix_row_negate (U, dim, 0);
2088 gcd_alpha_beta = S[0][0];
2090 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2091 but that is a quite strange case. Instead of ICEing, answer
2093 if (gcd_alpha_beta == 0)
2095 *overlaps_a = conflict_fn_not_known ();
2096 *overlaps_b = conflict_fn_not_known ();
2097 *last_conflicts = chrec_dont_know;
2098 goto end_analyze_subs_aa;
2101 /* The classic "gcd-test". */
2102 if (!int_divides_p (gcd_alpha_beta, gamma))
2104 /* The "gcd-test" has determined that there is no integer
2105 solution, i.e. there is no dependence. */
2106 *overlaps_a = conflict_fn_no_dependence ();
2107 *overlaps_b = conflict_fn_no_dependence ();
2108 *last_conflicts = integer_zero_node;
2111 /* Both access functions are univariate. This includes SIV and MIV cases. */
2112 else if (nb_vars_a == 1 && nb_vars_b == 1)
2114 /* Both functions should have the same evolution sign. */
2115 if (((A[0][0] > 0 && -A[1][0] > 0)
2116 || (A[0][0] < 0 && -A[1][0] < 0)))
2118 /* The solutions are given by:
2120 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2123 For a given integer t. Using the following variables,
2125 | i0 = u11 * gamma / gcd_alpha_beta
2126 | j0 = u12 * gamma / gcd_alpha_beta
2133 | y0 = j0 + j1 * t. */
2134 HOST_WIDE_INT i0, j0, i1, j1;
2136 i0 = U[0][0] * gamma / gcd_alpha_beta;
2137 j0 = U[0][1] * gamma / gcd_alpha_beta;
2141 if ((i1 == 0 && i0 < 0)
2142 || (j1 == 0 && j0 < 0))
2144 /* There is no solution.
2145 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2146 falls in here, but for the moment we don't look at the
2147 upper bound of the iteration domain. */
2148 *overlaps_a = conflict_fn_no_dependence ();
2149 *overlaps_b = conflict_fn_no_dependence ();
2150 *last_conflicts = integer_zero_node;
2151 goto end_analyze_subs_aa;
2154 if (i1 > 0 && j1 > 0)
2156 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2157 (get_chrec_loop (chrec_a), false);
2158 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2159 (get_chrec_loop (chrec_b), false);
2160 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2162 /* (X0, Y0) is a solution of the Diophantine equation:
2163 "chrec_a (X0) = chrec_b (Y0)". */
2164 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2166 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2167 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2169 /* (X1, Y1) is the smallest positive solution of the eq
2170 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2171 first conflict occurs. */
2172 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2173 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2174 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2178 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2179 FLOOR_DIV (niter - j0, j1));
2180 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2182 /* If the overlap occurs outside of the bounds of the
2183 loop, there is no dependence. */
2184 if (x1 > niter || y1 > niter)
2186 *overlaps_a = conflict_fn_no_dependence ();
2187 *overlaps_b = conflict_fn_no_dependence ();
2188 *last_conflicts = integer_zero_node;
2189 goto end_analyze_subs_aa;
2192 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2195 *last_conflicts = chrec_dont_know;
2199 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2201 build_int_cst (NULL_TREE, i1)));
2204 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2206 build_int_cst (NULL_TREE, j1)));
2210 /* FIXME: For the moment, the upper bound of the
2211 iteration domain for i and j is not checked. */
2212 if (dump_file && (dump_flags & TDF_DETAILS))
2213 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2214 *overlaps_a = conflict_fn_not_known ();
2215 *overlaps_b = conflict_fn_not_known ();
2216 *last_conflicts = chrec_dont_know;
2221 if (dump_file && (dump_flags & TDF_DETAILS))
2222 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2223 *overlaps_a = conflict_fn_not_known ();
2224 *overlaps_b = conflict_fn_not_known ();
2225 *last_conflicts = chrec_dont_know;
2230 if (dump_file && (dump_flags & TDF_DETAILS))
2231 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2232 *overlaps_a = conflict_fn_not_known ();
2233 *overlaps_b = conflict_fn_not_known ();
2234 *last_conflicts = chrec_dont_know;
2237 end_analyze_subs_aa:
2238 if (dump_file && (dump_flags & TDF_DETAILS))
2240 fprintf (dump_file, " (overlaps_a = ");
2241 dump_conflict_function (dump_file, *overlaps_a);
2242 fprintf (dump_file, ")\n (overlaps_b = ");
2243 dump_conflict_function (dump_file, *overlaps_b);
2244 fprintf (dump_file, ")\n");
2245 fprintf (dump_file, ")\n");
2249 /* Returns true when analyze_subscript_affine_affine can be used for
2250 determining the dependence relation between chrec_a and chrec_b,
2251 that contain symbols. This function modifies chrec_a and chrec_b
2252 such that the analysis result is the same, and such that they don't
2253 contain symbols, and then can safely be passed to the analyzer.
2255 Example: The analysis of the following tuples of evolutions produce
2256 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2259 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2260 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2264 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2266 tree diff, type, left_a, left_b, right_b;
2268 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2269 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2270 /* FIXME: For the moment not handled. Might be refined later. */
2273 type = chrec_type (*chrec_a);
2274 left_a = CHREC_LEFT (*chrec_a);
2275 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2276 diff = chrec_fold_minus (type, left_a, left_b);
2278 if (!evolution_function_is_constant_p (diff))
2281 if (dump_file && (dump_flags & TDF_DETAILS))
2282 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2284 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2285 diff, CHREC_RIGHT (*chrec_a));
2286 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2287 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2288 build_int_cst (type, 0),
2293 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2294 *OVERLAPS_B are initialized to the functions that describe the
2295 relation between the elements accessed twice by CHREC_A and
2296 CHREC_B. For k >= 0, the following property is verified:
2298 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2301 analyze_siv_subscript (tree chrec_a,
2303 conflict_function **overlaps_a,
2304 conflict_function **overlaps_b,
2305 tree *last_conflicts)
2307 dependence_stats.num_siv++;
2309 if (dump_file && (dump_flags & TDF_DETAILS))
2310 fprintf (dump_file, "(analyze_siv_subscript \n");
2312 if (evolution_function_is_constant_p (chrec_a)
2313 && evolution_function_is_affine_p (chrec_b))
2314 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2315 overlaps_a, overlaps_b, last_conflicts);
2317 else if (evolution_function_is_affine_p (chrec_a)
2318 && evolution_function_is_constant_p (chrec_b))
2319 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2320 overlaps_b, overlaps_a, last_conflicts);
2322 else if (evolution_function_is_affine_p (chrec_a)
2323 && evolution_function_is_affine_p (chrec_b))
2325 if (!chrec_contains_symbols (chrec_a)
2326 && !chrec_contains_symbols (chrec_b))
2328 analyze_subscript_affine_affine (chrec_a, chrec_b,
2329 overlaps_a, overlaps_b,
2332 if (CF_NOT_KNOWN_P (*overlaps_a)
2333 || CF_NOT_KNOWN_P (*overlaps_b))
2334 dependence_stats.num_siv_unimplemented++;
2335 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2336 || CF_NO_DEPENDENCE_P (*overlaps_b))
2337 dependence_stats.num_siv_independent++;
2339 dependence_stats.num_siv_dependent++;
2341 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2344 analyze_subscript_affine_affine (chrec_a, chrec_b,
2345 overlaps_a, overlaps_b,
2348 if (CF_NOT_KNOWN_P (*overlaps_a)
2349 || CF_NOT_KNOWN_P (*overlaps_b))
2350 dependence_stats.num_siv_unimplemented++;
2351 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2352 || CF_NO_DEPENDENCE_P (*overlaps_b))
2353 dependence_stats.num_siv_independent++;
2355 dependence_stats.num_siv_dependent++;
2358 goto siv_subscript_dontknow;
2363 siv_subscript_dontknow:;
2364 if (dump_file && (dump_flags & TDF_DETAILS))
2365 fprintf (dump_file, "siv test failed: unimplemented.\n");
2366 *overlaps_a = conflict_fn_not_known ();
2367 *overlaps_b = conflict_fn_not_known ();
2368 *last_conflicts = chrec_dont_know;
2369 dependence_stats.num_siv_unimplemented++;
2372 if (dump_file && (dump_flags & TDF_DETAILS))
2373 fprintf (dump_file, ")\n");
2376 /* Returns false if we can prove that the greatest common divisor of the steps
2377 of CHREC does not divide CST, false otherwise. */
2380 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2382 HOST_WIDE_INT cd = 0, val;
2385 if (!host_integerp (cst, 0))
2387 val = tree_low_cst (cst, 0);
2389 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2391 step = CHREC_RIGHT (chrec);
2392 if (!host_integerp (step, 0))
2394 cd = gcd (cd, tree_low_cst (step, 0));
2395 chrec = CHREC_LEFT (chrec);
2398 return val % cd == 0;
2401 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2402 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2403 functions that describe the relation between the elements accessed
2404 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2407 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2410 analyze_miv_subscript (tree chrec_a,
2412 conflict_function **overlaps_a,
2413 conflict_function **overlaps_b,
2414 tree *last_conflicts,
2415 struct loop *loop_nest)
2417 /* FIXME: This is a MIV subscript, not yet handled.
2418 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2421 In the SIV test we had to solve a Diophantine equation with two
2422 variables. In the MIV case we have to solve a Diophantine
2423 equation with 2*n variables (if the subscript uses n IVs).
2426 dependence_stats.num_miv++;
2427 if (dump_file && (dump_flags & TDF_DETAILS))
2428 fprintf (dump_file, "(analyze_miv_subscript \n");
2430 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
2431 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
2432 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
2434 if (eq_evolutions_p (chrec_a, chrec_b))
2436 /* Access functions are the same: all the elements are accessed
2437 in the same order. */
2438 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2439 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2440 *last_conflicts = estimated_loop_iterations_tree
2441 (get_chrec_loop (chrec_a), true);
2442 dependence_stats.num_miv_dependent++;
2445 else if (evolution_function_is_constant_p (difference)
2446 /* For the moment, the following is verified:
2447 evolution_function_is_affine_multivariate_p (chrec_a,
2449 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2451 /* testsuite/.../ssa-chrec-33.c
2452 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2454 The difference is 1, and all the evolution steps are multiples
2455 of 2, consequently there are no overlapping elements. */
2456 *overlaps_a = conflict_fn_no_dependence ();
2457 *overlaps_b = conflict_fn_no_dependence ();
2458 *last_conflicts = integer_zero_node;
2459 dependence_stats.num_miv_independent++;
2462 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2463 && !chrec_contains_symbols (chrec_a)
2464 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2465 && !chrec_contains_symbols (chrec_b))
2467 /* testsuite/.../ssa-chrec-35.c
2468 {0, +, 1}_2 vs. {0, +, 1}_3
2469 the overlapping elements are respectively located at iterations:
2470 {0, +, 1}_x and {0, +, 1}_x,
2471 in other words, we have the equality:
2472 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2475 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2476 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2478 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2479 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2481 analyze_subscript_affine_affine (chrec_a, chrec_b,
2482 overlaps_a, overlaps_b, last_conflicts);
2484 if (CF_NOT_KNOWN_P (*overlaps_a)
2485 || CF_NOT_KNOWN_P (*overlaps_b))
2486 dependence_stats.num_miv_unimplemented++;
2487 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2488 || CF_NO_DEPENDENCE_P (*overlaps_b))
2489 dependence_stats.num_miv_independent++;
2491 dependence_stats.num_miv_dependent++;
2496 /* When the analysis is too difficult, answer "don't know". */
2497 if (dump_file && (dump_flags & TDF_DETAILS))
2498 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2500 *overlaps_a = conflict_fn_not_known ();
2501 *overlaps_b = conflict_fn_not_known ();
2502 *last_conflicts = chrec_dont_know;
2503 dependence_stats.num_miv_unimplemented++;
2506 if (dump_file && (dump_flags & TDF_DETAILS))
2507 fprintf (dump_file, ")\n");
2510 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2511 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2512 OVERLAP_ITERATIONS_B are initialized with two functions that
2513 describe the iterations that contain conflicting elements.
2515 Remark: For an integer k >= 0, the following equality is true:
2517 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2521 analyze_overlapping_iterations (tree chrec_a,
2523 conflict_function **overlap_iterations_a,
2524 conflict_function **overlap_iterations_b,
2525 tree *last_conflicts, struct loop *loop_nest)
2527 unsigned int lnn = loop_nest->num;
2529 dependence_stats.num_subscript_tests++;
2531 if (dump_file && (dump_flags & TDF_DETAILS))
2533 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2534 fprintf (dump_file, " (chrec_a = ");
2535 print_generic_expr (dump_file, chrec_a, 0);
2536 fprintf (dump_file, ")\n (chrec_b = ");
2537 print_generic_expr (dump_file, chrec_b, 0);
2538 fprintf (dump_file, ")\n");
2541 if (chrec_a == NULL_TREE
2542 || chrec_b == NULL_TREE
2543 || chrec_contains_undetermined (chrec_a)
2544 || chrec_contains_undetermined (chrec_b))
2546 dependence_stats.num_subscript_undetermined++;
2548 *overlap_iterations_a = conflict_fn_not_known ();
2549 *overlap_iterations_b = conflict_fn_not_known ();
2552 /* If they are the same chrec, and are affine, they overlap
2553 on every iteration. */
2554 else if (eq_evolutions_p (chrec_a, chrec_b)
2555 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2557 dependence_stats.num_same_subscript_function++;
2558 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2559 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2560 *last_conflicts = chrec_dont_know;
2563 /* If they aren't the same, and aren't affine, we can't do anything
2565 else if ((chrec_contains_symbols (chrec_a)
2566 || chrec_contains_symbols (chrec_b))
2567 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2568 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2570 dependence_stats.num_subscript_undetermined++;
2571 *overlap_iterations_a = conflict_fn_not_known ();
2572 *overlap_iterations_b = conflict_fn_not_known ();
2575 else if (ziv_subscript_p (chrec_a, chrec_b))
2576 analyze_ziv_subscript (chrec_a, chrec_b,
2577 overlap_iterations_a, overlap_iterations_b,
2580 else if (siv_subscript_p (chrec_a, chrec_b))
2581 analyze_siv_subscript (chrec_a, chrec_b,
2582 overlap_iterations_a, overlap_iterations_b,
2586 analyze_miv_subscript (chrec_a, chrec_b,
2587 overlap_iterations_a, overlap_iterations_b,
2588 last_conflicts, loop_nest);
2590 if (dump_file && (dump_flags & TDF_DETAILS))
2592 fprintf (dump_file, " (overlap_iterations_a = ");
2593 dump_conflict_function (dump_file, *overlap_iterations_a);
2594 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2595 dump_conflict_function (dump_file, *overlap_iterations_b);
2596 fprintf (dump_file, ")\n");
2597 fprintf (dump_file, ")\n");
2601 /* Helper function for uniquely inserting distance vectors. */
2604 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2609 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2610 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2613 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2616 /* Helper function for uniquely inserting direction vectors. */
2619 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2624 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2625 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2628 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2631 /* Add a distance of 1 on all the loops outer than INDEX. If we
2632 haven't yet determined a distance for this outer loop, push a new
2633 distance vector composed of the previous distance, and a distance
2634 of 1 for this outer loop. Example:
2642 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2643 save (0, 1), then we have to save (1, 0). */
2646 add_outer_distances (struct data_dependence_relation *ddr,
2647 lambda_vector dist_v, int index)
2649 /* For each outer loop where init_v is not set, the accesses are
2650 in dependence of distance 1 in the loop. */
2651 while (--index >= 0)
2653 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2654 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2656 save_dist_v (ddr, save_v);
2660 /* Return false when fail to represent the data dependence as a
2661 distance vector. INIT_B is set to true when a component has been
2662 added to the distance vector DIST_V. INDEX_CARRY is then set to
2663 the index in DIST_V that carries the dependence. */
2666 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2667 struct data_reference *ddr_a,
2668 struct data_reference *ddr_b,
2669 lambda_vector dist_v, bool *init_b,
2673 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2675 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2677 tree access_fn_a, access_fn_b;
2678 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2680 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2682 non_affine_dependence_relation (ddr);
2686 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2687 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2689 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2690 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2693 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2694 DDR_LOOP_NEST (ddr));
2695 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2696 DDR_LOOP_NEST (ddr));
2698 /* The dependence is carried by the outermost loop. Example:
2705 In this case, the dependence is carried by loop_1. */
2706 index = index_a < index_b ? index_a : index_b;
2707 *index_carry = MIN (index, *index_carry);
2709 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2711 non_affine_dependence_relation (ddr);
2715 dist = int_cst_value (SUB_DISTANCE (subscript));
2717 /* This is the subscript coupling test. If we have already
2718 recorded a distance for this loop (a distance coming from
2719 another subscript), it should be the same. For example,
2720 in the following code, there is no dependence:
2727 if (init_v[index] != 0 && dist_v[index] != dist)
2729 finalize_ddr_dependent (ddr, chrec_known);
2733 dist_v[index] = dist;
2737 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2739 /* This can be for example an affine vs. constant dependence
2740 (T[i] vs. T[3]) that is not an affine dependence and is
2741 not representable as a distance vector. */
2742 non_affine_dependence_relation (ddr);
2750 /* Return true when the DDR contains two data references that have the
2751 same access functions. */
2754 same_access_functions (const struct data_dependence_relation *ddr)
2758 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2759 if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
2760 DR_ACCESS_FN (DDR_B (ddr), i)))
2766 /* Return true when the DDR contains only constant access functions. */
2769 constant_access_functions (const struct data_dependence_relation *ddr)
2773 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2774 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2775 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2782 /* Helper function for the case where DDR_A and DDR_B are the same
2783 multivariate access function. */
2786 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2789 tree c_1 = CHREC_LEFT (c_2);
2790 tree c_0 = CHREC_LEFT (c_1);
2791 lambda_vector dist_v;
2794 /* Polynomials with more than 2 variables are not handled yet. When
2795 the evolution steps are parameters, it is not possible to
2796 represent the dependence using classical distance vectors. */
2797 if (TREE_CODE (c_0) != INTEGER_CST
2798 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2799 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2801 DDR_AFFINE_P (ddr) = false;
2805 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2806 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2808 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2809 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2810 v1 = int_cst_value (CHREC_RIGHT (c_1));
2811 v2 = int_cst_value (CHREC_RIGHT (c_2));
2824 save_dist_v (ddr, dist_v);
2826 add_outer_distances (ddr, dist_v, x_1);
2829 /* Helper function for the case where DDR_A and DDR_B are the same
2830 access functions. */
2833 add_other_self_distances (struct data_dependence_relation *ddr)
2835 lambda_vector dist_v;
2837 int index_carry = DDR_NB_LOOPS (ddr);
2839 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2841 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2843 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2845 if (!evolution_function_is_univariate_p (access_fun))
2847 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2849 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2853 add_multivariate_self_dist (ddr, DR_ACCESS_FN (DDR_A (ddr), 0));
2857 index_carry = MIN (index_carry,
2858 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2859 DDR_LOOP_NEST (ddr)));
2863 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2864 add_outer_distances (ddr, dist_v, index_carry);
2868 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2870 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2872 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2873 save_dist_v (ddr, dist_v);
2876 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2877 is the case for example when access functions are the same and
2878 equal to a constant, as in:
2885 in which case the distance vectors are (0) and (1). */
2888 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2892 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2894 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2895 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2896 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2898 for (j = 0; j < ca->n; j++)
2899 if (affine_function_zero_p (ca->fns[j]))
2901 insert_innermost_unit_dist_vector (ddr);
2905 for (j = 0; j < cb->n; j++)
2906 if (affine_function_zero_p (cb->fns[j]))
2908 insert_innermost_unit_dist_vector (ddr);
2914 /* Compute the classic per loop distance vector. DDR is the data
2915 dependence relation to build a vector from. Return false when fail
2916 to represent the data dependence as a distance vector. */
2919 build_classic_dist_vector (struct data_dependence_relation *ddr,
2920 struct loop *loop_nest)
2922 bool init_b = false;
2923 int index_carry = DDR_NB_LOOPS (ddr);
2924 lambda_vector dist_v;
2926 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2929 if (same_access_functions (ddr))
2931 /* Save the 0 vector. */
2932 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2933 save_dist_v (ddr, dist_v);
2935 if (constant_access_functions (ddr))
2936 add_distance_for_zero_overlaps (ddr);
2938 if (DDR_NB_LOOPS (ddr) > 1)
2939 add_other_self_distances (ddr);
2944 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2945 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2946 dist_v, &init_b, &index_carry))
2949 /* Save the distance vector if we initialized one. */
2952 /* Verify a basic constraint: classic distance vectors should
2953 always be lexicographically positive.
2955 Data references are collected in the order of execution of
2956 the program, thus for the following loop
2958 | for (i = 1; i < 100; i++)
2959 | for (j = 1; j < 100; j++)
2961 | t = T[j+1][i-1]; // A
2962 | T[j][i] = t + 2; // B
2965 references are collected following the direction of the wind:
2966 A then B. The data dependence tests are performed also
2967 following this order, such that we're looking at the distance
2968 separating the elements accessed by A from the elements later
2969 accessed by B. But in this example, the distance returned by
2970 test_dep (A, B) is lexicographically negative (-1, 1), that
2971 means that the access A occurs later than B with respect to
2972 the outer loop, ie. we're actually looking upwind. In this
2973 case we solve test_dep (B, A) looking downwind to the
2974 lexicographically positive solution, that returns the
2975 distance vector (1, -1). */
2976 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
2978 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2979 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
2982 compute_subscript_distance (ddr);
2983 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
2984 save_v, &init_b, &index_carry))
2986 save_dist_v (ddr, save_v);
2987 DDR_REVERSED_P (ddr) = true;
2989 /* In this case there is a dependence forward for all the
2992 | for (k = 1; k < 100; k++)
2993 | for (i = 1; i < 100; i++)
2994 | for (j = 1; j < 100; j++)
2996 | t = T[j+1][i-1]; // A
2997 | T[j][i] = t + 2; // B
3005 if (DDR_NB_LOOPS (ddr) > 1)
3007 add_outer_distances (ddr, save_v, index_carry);
3008 add_outer_distances (ddr, dist_v, index_carry);
3013 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3014 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3016 if (DDR_NB_LOOPS (ddr) > 1)
3018 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3020 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3021 DDR_A (ddr), loop_nest))
3023 compute_subscript_distance (ddr);
3024 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3025 opposite_v, &init_b,
3029 save_dist_v (ddr, save_v);
3030 add_outer_distances (ddr, dist_v, index_carry);
3031 add_outer_distances (ddr, opposite_v, index_carry);
3034 save_dist_v (ddr, save_v);
3039 /* There is a distance of 1 on all the outer loops: Example:
3040 there is a dependence of distance 1 on loop_1 for the array A.
3046 add_outer_distances (ddr, dist_v,
3047 lambda_vector_first_nz (dist_v,
3048 DDR_NB_LOOPS (ddr), 0));
3051 if (dump_file && (dump_flags & TDF_DETAILS))
3055 fprintf (dump_file, "(build_classic_dist_vector\n");
3056 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3058 fprintf (dump_file, " dist_vector = (");
3059 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3060 DDR_NB_LOOPS (ddr));
3061 fprintf (dump_file, " )\n");
3063 fprintf (dump_file, ")\n");
3069 /* Return the direction for a given distance.
3070 FIXME: Computing dir this way is suboptimal, since dir can catch
3071 cases that dist is unable to represent. */
3073 static inline enum data_dependence_direction
3074 dir_from_dist (int dist)
3077 return dir_positive;
3079 return dir_negative;
3084 /* Compute the classic per loop direction vector. DDR is the data
3085 dependence relation to build a vector from. */
3088 build_classic_dir_vector (struct data_dependence_relation *ddr)
3091 lambda_vector dist_v;
3093 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3095 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3097 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3098 dir_v[j] = dir_from_dist (dist_v[j]);
3100 save_dir_v (ddr, dir_v);
3104 /* Helper function. Returns true when there is a dependence between
3105 data references DRA and DRB. */
3108 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3109 struct data_reference *dra,
3110 struct data_reference *drb,
3111 struct loop *loop_nest)
3114 tree last_conflicts;
3115 struct subscript *subscript;
3117 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3120 conflict_function *overlaps_a, *overlaps_b;
3122 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3123 DR_ACCESS_FN (drb, i),
3124 &overlaps_a, &overlaps_b,
3125 &last_conflicts, loop_nest);
3127 if (CF_NOT_KNOWN_P (overlaps_a)
3128 || CF_NOT_KNOWN_P (overlaps_b))
3130 finalize_ddr_dependent (ddr, chrec_dont_know);
3131 dependence_stats.num_dependence_undetermined++;
3132 free_conflict_function (overlaps_a);
3133 free_conflict_function (overlaps_b);
3137 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3138 || CF_NO_DEPENDENCE_P (overlaps_b))
3140 finalize_ddr_dependent (ddr, chrec_known);
3141 dependence_stats.num_dependence_independent++;
3142 free_conflict_function (overlaps_a);
3143 free_conflict_function (overlaps_b);
3149 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3150 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3151 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3158 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3161 subscript_dependence_tester (struct data_dependence_relation *ddr,
3162 struct loop *loop_nest)
3165 if (dump_file && (dump_flags & TDF_DETAILS))
3166 fprintf (dump_file, "(subscript_dependence_tester \n");
3168 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3169 dependence_stats.num_dependence_dependent++;
3171 compute_subscript_distance (ddr);
3172 if (build_classic_dist_vector (ddr, loop_nest))
3173 build_classic_dir_vector (ddr);
3175 if (dump_file && (dump_flags & TDF_DETAILS))
3176 fprintf (dump_file, ")\n");
3179 /* Returns true when all the access functions of A are affine or
3180 constant with respect to LOOP_NEST. */
3183 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3184 const struct loop *loop_nest)
3187 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3190 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3191 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3192 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3198 /* Initializes an equation for an OMEGA problem using the information
3199 contained in the ACCESS_FUN. Returns true when the operation
3202 PB is the omega constraint system.
3203 EQ is the number of the equation to be initialized.
3204 OFFSET is used for shifting the variables names in the constraints:
3205 a constrain is composed of 2 * the number of variables surrounding
3206 dependence accesses. OFFSET is set either to 0 for the first n variables,
3207 then it is set to n.
3208 ACCESS_FUN is expected to be an affine chrec. */
3211 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3212 unsigned int offset, tree access_fun,
3213 struct data_dependence_relation *ddr)
3215 switch (TREE_CODE (access_fun))
3217 case POLYNOMIAL_CHREC:
3219 tree left = CHREC_LEFT (access_fun);
3220 tree right = CHREC_RIGHT (access_fun);
3221 int var = CHREC_VARIABLE (access_fun);
3224 if (TREE_CODE (right) != INTEGER_CST)
3227 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3228 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3230 /* Compute the innermost loop index. */
3231 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3234 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3235 += int_cst_value (right);
3237 switch (TREE_CODE (left))
3239 case POLYNOMIAL_CHREC:
3240 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3243 pb->eqs[eq].coef[0] += int_cst_value (left);
3252 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3260 /* As explained in the comments preceding init_omega_for_ddr, we have
3261 to set up a system for each loop level, setting outer loops
3262 variation to zero, and current loop variation to positive or zero.
3263 Save each lexico positive distance vector. */
3266 omega_extract_distance_vectors (omega_pb pb,
3267 struct data_dependence_relation *ddr)
3271 struct loop *loopi, *loopj;
3272 enum omega_result res;
3274 /* Set a new problem for each loop in the nest. The basis is the
3275 problem that we have initialized until now. On top of this we
3276 add new constraints. */
3277 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3278 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3281 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3282 DDR_NB_LOOPS (ddr));
3284 omega_copy_problem (copy, pb);
3286 /* For all the outer loops "loop_j", add "dj = 0". */
3288 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3290 eq = omega_add_zero_eq (copy, omega_black);
3291 copy->eqs[eq].coef[j + 1] = 1;
3294 /* For "loop_i", add "0 <= di". */
3295 geq = omega_add_zero_geq (copy, omega_black);
3296 copy->geqs[geq].coef[i + 1] = 1;
3298 /* Reduce the constraint system, and test that the current
3299 problem is feasible. */
3300 res = omega_simplify_problem (copy);
3301 if (res == omega_false
3302 || res == omega_unknown
3303 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3306 for (eq = 0; eq < copy->num_subs; eq++)
3307 if (copy->subs[eq].key == (int) i + 1)
3309 dist = copy->subs[eq].coef[0];
3315 /* Reinitialize problem... */
3316 omega_copy_problem (copy, pb);
3318 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3320 eq = omega_add_zero_eq (copy, omega_black);
3321 copy->eqs[eq].coef[j + 1] = 1;
3324 /* ..., but this time "di = 1". */
3325 eq = omega_add_zero_eq (copy, omega_black);
3326 copy->eqs[eq].coef[i + 1] = 1;
3327 copy->eqs[eq].coef[0] = -1;
3329 res = omega_simplify_problem (copy);
3330 if (res == omega_false
3331 || res == omega_unknown
3332 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3335 for (eq = 0; eq < copy->num_subs; eq++)
3336 if (copy->subs[eq].key == (int) i + 1)
3338 dist = copy->subs[eq].coef[0];
3344 /* Save the lexicographically positive distance vector. */
3347 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3348 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3352 for (eq = 0; eq < copy->num_subs; eq++)
3353 if (copy->subs[eq].key > 0)
3355 dist = copy->subs[eq].coef[0];
3356 dist_v[copy->subs[eq].key - 1] = dist;
3359 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3360 dir_v[j] = dir_from_dist (dist_v[j]);
3362 save_dist_v (ddr, dist_v);
3363 save_dir_v (ddr, dir_v);
3367 omega_free_problem (copy);
3371 /* This is called for each subscript of a tuple of data references:
3372 insert an equality for representing the conflicts. */
3375 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3376 struct data_dependence_relation *ddr,
3377 omega_pb pb, bool *maybe_dependent)
3380 tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE);
3381 tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE);
3382 tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b);
3384 /* When the fun_a - fun_b is not constant, the dependence is not
3385 captured by the classic distance vector representation. */
3386 if (TREE_CODE (difference) != INTEGER_CST)
3390 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3392 /* There is no dependence. */
3393 *maybe_dependent = false;
3397 fun_b = chrec_fold_multiply (integer_type_node, fun_b,
3398 integer_minus_one_node);
3400 eq = omega_add_zero_eq (pb, omega_black);
3401 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3402 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3403 /* There is probably a dependence, but the system of
3404 constraints cannot be built: answer "don't know". */
3408 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3409 && !int_divides_p (lambda_vector_gcd
3410 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3411 2 * DDR_NB_LOOPS (ddr)),
3412 pb->eqs[eq].coef[0]))
3414 /* There is no dependence. */
3415 *maybe_dependent = false;
3422 /* Helper function, same as init_omega_for_ddr but specialized for
3423 data references A and B. */
3426 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3427 struct data_dependence_relation *ddr,
3428 omega_pb pb, bool *maybe_dependent)
3433 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3435 /* Insert an equality per subscript. */
3436 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3438 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3439 ddr, pb, maybe_dependent))
3441 else if (*maybe_dependent == false)
3443 /* There is no dependence. */
3444 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3449 /* Insert inequalities: constraints corresponding to the iteration
3450 domain, i.e. the loops surrounding the references "loop_x" and
3451 the distance variables "dx". The layout of the OMEGA
3452 representation is as follows:
3453 - coef[0] is the constant
3454 - coef[1..nb_loops] are the protected variables that will not be
3455 removed by the solver: the "dx"
3456 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3458 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3459 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3461 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3464 ineq = omega_add_zero_geq (pb, omega_black);
3465 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3467 /* 0 <= loop_x + dx */
3468 ineq = omega_add_zero_geq (pb, omega_black);
3469 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3470 pb->geqs[ineq].coef[i + 1] = 1;
3474 /* loop_x <= nb_iters */
3475 ineq = omega_add_zero_geq (pb, omega_black);
3476 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3477 pb->geqs[ineq].coef[0] = nbi;
3479 /* loop_x + dx <= nb_iters */
3480 ineq = omega_add_zero_geq (pb, omega_black);
3481 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3482 pb->geqs[ineq].coef[i + 1] = -1;
3483 pb->geqs[ineq].coef[0] = nbi;
3485 /* A step "dx" bigger than nb_iters is not feasible, so
3486 add "0 <= nb_iters + dx", */
3487 ineq = omega_add_zero_geq (pb, omega_black);
3488 pb->geqs[ineq].coef[i + 1] = 1;
3489 pb->geqs[ineq].coef[0] = nbi;
3490 /* and "dx <= nb_iters". */
3491 ineq = omega_add_zero_geq (pb, omega_black);
3492 pb->geqs[ineq].coef[i + 1] = -1;
3493 pb->geqs[ineq].coef[0] = nbi;
3497 omega_extract_distance_vectors (pb, ddr);
3502 /* Sets up the Omega dependence problem for the data dependence
3503 relation DDR. Returns false when the constraint system cannot be
3504 built, ie. when the test answers "don't know". Returns true
3505 otherwise, and when independence has been proved (using one of the
3506 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3507 set MAYBE_DEPENDENT to true.
3509 Example: for setting up the dependence system corresponding to the
3510 conflicting accesses
3515 | ... A[2*j, 2*(i + j)]
3519 the following constraints come from the iteration domain:
3526 where di, dj are the distance variables. The constraints
3527 representing the conflicting elements are:
3530 i + 1 = 2 * (i + di + j + dj)
3532 For asking that the resulting distance vector (di, dj) be
3533 lexicographically positive, we insert the constraint "di >= 0". If
3534 "di = 0" in the solution, we fix that component to zero, and we
3535 look at the inner loops: we set a new problem where all the outer
3536 loop distances are zero, and fix this inner component to be
3537 positive. When one of the components is positive, we save that
3538 distance, and set a new problem where the distance on this loop is
3539 zero, searching for other distances in the inner loops. Here is
3540 the classic example that illustrates that we have to set for each
3541 inner loop a new problem:
3549 we have to save two distances (1, 0) and (0, 1).
3551 Given two array references, refA and refB, we have to set the
3552 dependence problem twice, refA vs. refB and refB vs. refA, and we
3553 cannot do a single test, as refB might occur before refA in the
3554 inner loops, and the contrary when considering outer loops: ex.
3559 | T[{1,+,1}_2][{1,+,1}_1] // refA
3560 | T[{2,+,1}_2][{0,+,1}_1] // refB
3565 refB touches the elements in T before refA, and thus for the same
3566 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3567 but for successive loop_0 iterations, we have (1, -1, 1)
3569 The Omega solver expects the distance variables ("di" in the
3570 previous example) to come first in the constraint system (as
3571 variables to be protected, or "safe" variables), the constraint
3572 system is built using the following layout:
3574 "cst | distance vars | index vars".
3578 init_omega_for_ddr (struct data_dependence_relation *ddr,
3579 bool *maybe_dependent)
3584 *maybe_dependent = true;
3586 if (same_access_functions (ddr))
3589 lambda_vector dir_v;
3591 /* Save the 0 vector. */
3592 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3593 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3594 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3595 dir_v[j] = dir_equal;
3596 save_dir_v (ddr, dir_v);
3598 /* Save the dependences carried by outer loops. */
3599 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3600 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3602 omega_free_problem (pb);
3606 /* Omega expects the protected variables (those that have to be kept
3607 after elimination) to appear first in the constraint system.
3608 These variables are the distance variables. In the following
3609 initialization we declare NB_LOOPS safe variables, and the total
3610 number of variables for the constraint system is 2*NB_LOOPS. */
3611 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3612 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3614 omega_free_problem (pb);
3616 /* Stop computation if not decidable, or no dependence. */
3617 if (res == false || *maybe_dependent == false)
3620 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3621 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3623 omega_free_problem (pb);
3628 /* Return true when DDR contains the same information as that stored
3629 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3632 ddr_consistent_p (FILE *file,
3633 struct data_dependence_relation *ddr,
3634 VEC (lambda_vector, heap) *dist_vects,
3635 VEC (lambda_vector, heap) *dir_vects)
3639 /* If dump_file is set, output there. */
3640 if (dump_file && (dump_flags & TDF_DETAILS))
3643 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3645 lambda_vector b_dist_v;
3646 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3647 VEC_length (lambda_vector, dist_vects),
3648 DDR_NUM_DIST_VECTS (ddr));
3650 fprintf (file, "Banerjee dist vectors:\n");
3651 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3652 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3654 fprintf (file, "Omega dist vectors:\n");
3655 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3656 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3658 fprintf (file, "data dependence relation:\n");
3659 dump_data_dependence_relation (file, ddr);
3661 fprintf (file, ")\n");
3665 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3667 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3668 VEC_length (lambda_vector, dir_vects),
3669 DDR_NUM_DIR_VECTS (ddr));
3673 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3675 lambda_vector a_dist_v;
3676 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3678 /* Distance vectors are not ordered in the same way in the DDR
3679 and in the DIST_VECTS: search for a matching vector. */
3680 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3681 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3684 if (j == VEC_length (lambda_vector, dist_vects))
3686 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3687 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3688 fprintf (file, "not found in Omega dist vectors:\n");
3689 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3690 fprintf (file, "data dependence relation:\n");
3691 dump_data_dependence_relation (file, ddr);
3692 fprintf (file, ")\n");
3696 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3698 lambda_vector a_dir_v;
3699 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3701 /* Direction vectors are not ordered in the same way in the DDR
3702 and in the DIR_VECTS: search for a matching vector. */
3703 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3704 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3707 if (j == VEC_length (lambda_vector, dist_vects))
3709 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3710 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3711 fprintf (file, "not found in Omega dir vectors:\n");
3712 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3713 fprintf (file, "data dependence relation:\n");
3714 dump_data_dependence_relation (file, ddr);
3715 fprintf (file, ")\n");
3722 /* This computes the affine dependence relation between A and B with
3723 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3724 independence between two accesses, while CHREC_DONT_KNOW is used
3725 for representing the unknown relation.
3727 Note that it is possible to stop the computation of the dependence
3728 relation the first time we detect a CHREC_KNOWN element for a given
3732 compute_affine_dependence (struct data_dependence_relation *ddr,
3733 struct loop *loop_nest)
3735 struct data_reference *dra = DDR_A (ddr);
3736 struct data_reference *drb = DDR_B (ddr);
3738 if (dump_file && (dump_flags & TDF_DETAILS))
3740 fprintf (dump_file, "(compute_affine_dependence\n");
3741 fprintf (dump_file, " (stmt_a = \n");
3742 print_generic_expr (dump_file, DR_STMT (dra), 0);
3743 fprintf (dump_file, ")\n (stmt_b = \n");
3744 print_generic_expr (dump_file, DR_STMT (drb), 0);
3745 fprintf (dump_file, ")\n");
3748 /* Analyze only when the dependence relation is not yet known. */
3749 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3751 dependence_stats.num_dependence_tests++;
3753 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3754 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3756 if (flag_check_data_deps)
3758 /* Compute the dependences using the first algorithm. */
3759 subscript_dependence_tester (ddr, loop_nest);
3761 if (dump_file && (dump_flags & TDF_DETAILS))
3763 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3764 dump_data_dependence_relation (dump_file, ddr);
3767 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3769 bool maybe_dependent;
3770 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3772 /* Save the result of the first DD analyzer. */
3773 dist_vects = DDR_DIST_VECTS (ddr);
3774 dir_vects = DDR_DIR_VECTS (ddr);
3776 /* Reset the information. */
3777 DDR_DIST_VECTS (ddr) = NULL;
3778 DDR_DIR_VECTS (ddr) = NULL;
3780 /* Compute the same information using Omega. */
3781 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3782 goto csys_dont_know;
3784 if (dump_file && (dump_flags & TDF_DETAILS))
3786 fprintf (dump_file, "Omega Analyzer\n");
3787 dump_data_dependence_relation (dump_file, ddr);
3790 /* Check that we get the same information. */
3791 if (maybe_dependent)
3792 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3797 subscript_dependence_tester (ddr, loop_nest);
3800 /* As a last case, if the dependence cannot be determined, or if
3801 the dependence is considered too difficult to determine, answer
3806 dependence_stats.num_dependence_undetermined++;
3808 if (dump_file && (dump_flags & TDF_DETAILS))
3810 fprintf (dump_file, "Data ref a:\n");
3811 dump_data_reference (dump_file, dra);
3812 fprintf (dump_file, "Data ref b:\n");
3813 dump_data_reference (dump_file, drb);
3814 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3816 finalize_ddr_dependent (ddr, chrec_dont_know);
3820 if (dump_file && (dump_flags & TDF_DETAILS))
3821 fprintf (dump_file, ")\n");
3824 /* This computes the dependence relation for the same data
3825 reference into DDR. */
3828 compute_self_dependence (struct data_dependence_relation *ddr)
3831 struct subscript *subscript;
3833 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3836 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3839 /* The accessed index overlaps for each iteration. */
3840 SUB_CONFLICTS_IN_A (subscript)
3841 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3842 SUB_CONFLICTS_IN_B (subscript)
3843 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3844 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3847 /* The distance vector is the zero vector. */
3848 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3849 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3852 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3853 the data references in DATAREFS, in the LOOP_NEST. When
3854 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3858 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3859 VEC (ddr_p, heap) **dependence_relations,
3860 VEC (loop_p, heap) *loop_nest,
3861 bool compute_self_and_rr)
3863 struct data_dependence_relation *ddr;
3864 struct data_reference *a, *b;
3867 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3868 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3869 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3871 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3872 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3873 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3876 if (compute_self_and_rr)
3877 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3879 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3880 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3881 compute_self_dependence (ddr);
3885 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3886 true if STMT clobbers memory, false otherwise. */
3889 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3891 bool clobbers_memory = false;
3893 tree *op0, *op1, call;
3897 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3898 Calls have side-effects, except those to const or pure
3900 call = get_call_expr_in (stmt);
3902 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3903 || (TREE_CODE (stmt) == ASM_EXPR
3904 && ASM_VOLATILE_P (stmt)))
3905 clobbers_memory = true;
3907 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3908 return clobbers_memory;
3910 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3912 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3913 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3916 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
3918 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3920 ref->is_read = true;
3924 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3926 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3928 ref->is_read = false;
3934 unsigned i, n = call_expr_nargs (call);
3936 for (i = 0; i < n; i++)
3938 op0 = &CALL_EXPR_ARG (call, i);
3941 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3943 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3945 ref->is_read = true;
3950 return clobbers_memory;
3953 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3954 reference, returns false, otherwise returns true. NEST is the outermost
3955 loop of the loop nest in that the references should be analyzed. */
3958 find_data_references_in_stmt (struct loop *nest, tree stmt,
3959 VEC (data_reference_p, heap) **datarefs)
3962 VEC (data_ref_loc, heap) *references;
3965 data_reference_p dr;
3967 if (get_references_in_stmt (stmt, &references))
3969 VEC_free (data_ref_loc, heap, references);
3973 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
3975 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
3976 gcc_assert (dr != NULL);
3978 /* FIXME -- data dependence analysis does not work correctly for objects with
3979 invariant addresses. Let us fail here until the problem is fixed. */
3980 if (dr_address_invariant_p (dr))
3983 if (dump_file && (dump_flags & TDF_DETAILS))
3984 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
3989 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
3991 VEC_free (data_ref_loc, heap, references);
3995 /* Search the data references in LOOP, and record the information into
3996 DATAREFS. Returns chrec_dont_know when failing to analyze a
3997 difficult case, returns NULL_TREE otherwise.
3999 TODO: This function should be made smarter so that it can handle address
4000 arithmetic as if they were array accesses, etc. */
4003 find_data_references_in_loop (struct loop *loop,
4004 VEC (data_reference_p, heap) **datarefs)
4006 basic_block bb, *bbs;
4008 block_stmt_iterator bsi;
4010 bbs = get_loop_body_in_dom_order (loop);
4012 for (i = 0; i < loop->num_nodes; i++)
4016 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4018 tree stmt = bsi_stmt (bsi);
4020 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4022 struct data_reference *res;
4023 res = XCNEW (struct data_reference);
4024 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4027 return chrec_dont_know;
4036 /* Recursive helper function. */
4039 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4041 /* Inner loops of the nest should not contain siblings. Example:
4042 when there are two consecutive loops,
4053 the dependence relation cannot be captured by the distance
4058 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4060 return find_loop_nest_1 (loop->inner, loop_nest);
4064 /* Return false when the LOOP is not well nested. Otherwise return
4065 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4066 contain the loops from the outermost to the innermost, as they will
4067 appear in the classic distance vector. */
4070 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4072 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4074 return find_loop_nest_1 (loop->inner, loop_nest);
4078 /* Given a loop nest LOOP, the following vectors are returned:
4079 DATAREFS is initialized to all the array elements contained in this loop,
4080 DEPENDENCE_RELATIONS contains the relations between the data references.
4081 Compute read-read and self relations if
4082 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4085 compute_data_dependences_for_loop (struct loop *loop,
4086 bool compute_self_and_read_read_dependences,
4087 VEC (data_reference_p, heap) **datarefs,
4088 VEC (ddr_p, heap) **dependence_relations)
4090 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4092 memset (&dependence_stats, 0, sizeof (dependence_stats));
4094 /* If the loop nest is not well formed, or one of the data references
4095 is not computable, give up without spending time to compute other
4098 || !find_loop_nest (loop, &vloops)
4099 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4101 struct data_dependence_relation *ddr;
4103 /* Insert a single relation into dependence_relations:
4105 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4106 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4109 compute_all_dependences (*datarefs, dependence_relations, vloops,
4110 compute_self_and_read_read_dependences);
4112 if (dump_file && (dump_flags & TDF_STATS))
4114 fprintf (dump_file, "Dependence tester statistics:\n");
4116 fprintf (dump_file, "Number of dependence tests: %d\n",
4117 dependence_stats.num_dependence_tests);
4118 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4119 dependence_stats.num_dependence_dependent);
4120 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4121 dependence_stats.num_dependence_independent);
4122 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4123 dependence_stats.num_dependence_undetermined);
4125 fprintf (dump_file, "Number of subscript tests: %d\n",
4126 dependence_stats.num_subscript_tests);
4127 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4128 dependence_stats.num_subscript_undetermined);
4129 fprintf (dump_file, "Number of same subscript function: %d\n",
4130 dependence_stats.num_same_subscript_function);
4132 fprintf (dump_file, "Number of ziv tests: %d\n",
4133 dependence_stats.num_ziv);
4134 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4135 dependence_stats.num_ziv_dependent);
4136 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4137 dependence_stats.num_ziv_independent);
4138 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4139 dependence_stats.num_ziv_unimplemented);
4141 fprintf (dump_file, "Number of siv tests: %d\n",
4142 dependence_stats.num_siv);
4143 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4144 dependence_stats.num_siv_dependent);
4145 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4146 dependence_stats.num_siv_independent);
4147 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4148 dependence_stats.num_siv_unimplemented);
4150 fprintf (dump_file, "Number of miv tests: %d\n",
4151 dependence_stats.num_miv);
4152 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4153 dependence_stats.num_miv_dependent);
4154 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4155 dependence_stats.num_miv_independent);
4156 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4157 dependence_stats.num_miv_unimplemented);
4161 /* Entry point (for testing only). Analyze all the data references
4162 and the dependence relations in LOOP.
4164 The data references are computed first.
4166 A relation on these nodes is represented by a complete graph. Some
4167 of the relations could be of no interest, thus the relations can be
4170 In the following function we compute all the relations. This is
4171 just a first implementation that is here for:
4172 - for showing how to ask for the dependence relations,
4173 - for the debugging the whole dependence graph,
4174 - for the dejagnu testcases and maintenance.
4176 It is possible to ask only for a part of the graph, avoiding to
4177 compute the whole dependence graph. The computed dependences are
4178 stored in a knowledge base (KB) such that later queries don't
4179 recompute the same information. The implementation of this KB is
4180 transparent to the optimizer, and thus the KB can be changed with a
4181 more efficient implementation, or the KB could be disabled. */
4183 analyze_all_data_dependences (struct loop *loop)
4186 int nb_data_refs = 10;
4187 VEC (data_reference_p, heap) *datarefs =
4188 VEC_alloc (data_reference_p, heap, nb_data_refs);
4189 VEC (ddr_p, heap) *dependence_relations =
4190 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4192 /* Compute DDs on the whole function. */
4193 compute_data_dependences_for_loop (loop, false, &datarefs,
4194 &dependence_relations);
4198 dump_data_dependence_relations (dump_file, dependence_relations);
4199 fprintf (dump_file, "\n\n");
4201 if (dump_flags & TDF_DETAILS)
4202 dump_dist_dir_vectors (dump_file, dependence_relations);
4204 if (dump_flags & TDF_STATS)
4206 unsigned nb_top_relations = 0;
4207 unsigned nb_bot_relations = 0;
4208 unsigned nb_basename_differ = 0;
4209 unsigned nb_chrec_relations = 0;
4210 struct data_dependence_relation *ddr;
4212 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4214 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4217 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4219 struct data_reference *a = DDR_A (ddr);
4220 struct data_reference *b = DDR_B (ddr);
4222 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4223 nb_basename_differ++;
4229 nb_chrec_relations++;
4232 gather_stats_on_scev_database ();
4236 free_dependence_relations (dependence_relations);
4237 free_data_refs (datarefs);
4240 /* Computes all the data dependences and check that the results of
4241 several analyzers are the same. */
4244 tree_check_data_deps (void)
4247 struct loop *loop_nest;
4249 FOR_EACH_LOOP (li, loop_nest, 0)
4250 analyze_all_data_dependences (loop_nest);
4253 /* Free the memory used by a data dependence relation DDR. */
4256 free_dependence_relation (struct data_dependence_relation *ddr)
4261 if (DDR_SUBSCRIPTS (ddr))
4262 free_subscripts (DDR_SUBSCRIPTS (ddr));
4263 if (DDR_DIST_VECTS (ddr))
4264 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4265 if (DDR_DIR_VECTS (ddr))
4266 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4271 /* Free the memory used by the data dependence relations from
4272 DEPENDENCE_RELATIONS. */
4275 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4278 struct data_dependence_relation *ddr;
4279 VEC (loop_p, heap) *loop_nest = NULL;
4281 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4285 if (loop_nest == NULL)
4286 loop_nest = DDR_LOOP_NEST (ddr);
4288 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4289 || DDR_LOOP_NEST (ddr) == loop_nest);
4290 free_dependence_relation (ddr);
4294 VEC_free (loop_p, heap, loop_nest);
4295 VEC_free (ddr_p, heap, dependence_relations);
4298 /* Free the memory used by the data references from DATAREFS. */
4301 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4304 struct data_reference *dr;
4306 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4308 VEC_free (data_reference_p, heap, datarefs);
4313 /* Returns the index of STMT in RDG. */
4316 find_vertex_for_stmt (const struct graph *rdg, const_tree stmt)
4320 for (i = 0; i < rdg->n_vertices; i++)
4321 if (RDGV_STMT (&(rdg->vertices[i])) == stmt)
4328 /* Creates an edge in RDG for each distance vector from DDR. */
4331 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4334 data_reference_p dra;
4335 data_reference_p drb;
4336 struct graph_edge *e;
4338 if (DDR_REVERSED_P (ddr))
4349 va = find_vertex_for_stmt (rdg, DR_STMT (dra));
4350 vb = find_vertex_for_stmt (rdg, DR_STMT (drb));
4352 e = add_edge (rdg, va, vb);
4353 e->data = XNEW (struct rdg_edge);
4355 /* Determines the type of the data dependence. */
4356 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4357 RDGE_TYPE (e) = input_dd;
4358 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4359 RDGE_TYPE (e) = output_dd;
4360 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4361 RDGE_TYPE (e) = flow_dd;
4362 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4363 RDGE_TYPE (e) = anti_dd;
4366 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4367 the index of DEF in RDG. */
4370 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4372 use_operand_p imm_use_p;
4373 imm_use_iterator iterator;
4375 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4377 int use = find_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4378 struct graph_edge *e = add_edge (rdg, idef, use);
4380 e->data = XNEW (struct rdg_edge);
4381 RDGE_TYPE (e) = flow_dd;
4385 /* Creates the edges of the reduced dependence graph RDG. */
4388 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4391 struct data_dependence_relation *ddr;
4392 def_operand_p def_p;
4395 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4396 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4397 create_rdg_edge_for_ddr (rdg, ddr);
4399 for (i = 0; i < rdg->n_vertices; i++)
4400 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDGV_STMT (&(rdg->vertices[i])),
4401 iter, SSA_OP_ALL_DEFS)
4402 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4405 /* Build the vertices of the reduced dependence graph RDG. */
4408 create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
4413 for (i = 0; VEC_iterate (tree, stmts, i, s); i++)
4415 struct vertex *v = &(rdg->vertices[i]);
4417 v->data = XNEW (struct rdg_vertex);
4422 /* Initialize STMTS with all the statements and PHI nodes of LOOP. */
4425 stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4428 basic_block *bbs = get_loop_body_in_dom_order (loop);
4430 for (i = 0; i < loop->num_nodes; i++)
4433 basic_block bb = bbs[i];
4434 block_stmt_iterator bsi;
4436 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4437 VEC_safe_push (tree, heap, *stmts, phi);
4439 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4440 VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
4446 /* Returns true when all the dependences are computable. */
4449 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4454 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4455 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4461 /* Build a Reduced Dependence Graph with one vertex per statement of the
4462 loop nest and one edge per data dependence or scalar dependence. */
4465 build_rdg (struct loop *loop)
4467 int nb_data_refs = 10;
4468 struct graph *rdg = NULL;
4469 VEC (ddr_p, heap) *dependence_relations;
4470 VEC (data_reference_p, heap) *datarefs;
4471 VEC (tree, heap) *stmts = VEC_alloc (tree, heap, 10);
4473 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4474 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4475 compute_data_dependences_for_loop (loop,
4478 &dependence_relations);
4480 if (!known_dependences_p (dependence_relations))
4483 stmts_from_loop (loop, &stmts);
4484 rdg = new_graph (VEC_length (tree, stmts));
4485 create_rdg_vertices (rdg, stmts);
4486 create_rdg_edges (rdg, dependence_relations);
4489 free_dependence_relations (dependence_relations);
4490 free_data_refs (datarefs);
4491 VEC_free (tree, heap, stmts);