1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass walks a given loop structure searching for array
22 references. The information about the array accesses is recorded
23 in DATA_REFERENCE structures.
25 The basic test for determining the dependences is:
26 given two access functions chrec1 and chrec2 to a same array, and
27 x and y two vectors from the iteration domain, the same element of
28 the array is accessed twice at iterations x and y if and only if:
29 | chrec1 (x) == chrec2 (y).
31 The goals of this analysis are:
33 - to determine the independence: the relation between two
34 independent accesses is qualified with the chrec_known (this
35 information allows a loop parallelization),
37 - when two data references access the same data, to qualify the
38 dependence relation with classic dependence representations:
42 - loop carried level dependence
43 - polyhedron dependence
44 or with the chains of recurrences based representation,
46 - to define a knowledge base for storing the data dependence
49 - to define an interface to access this data.
54 - subscript: given two array accesses a subscript is the tuple
55 composed of the access functions for a given dimension. Example:
56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57 (f1, g1), (f2, g2), (f3, g3).
59 - Diophantine equation: an equation whose coefficients and
60 solutions are integer constants, for example the equation
62 has an integer solution x = 1 and y = -1.
66 - "Advanced Compilation for High Performance Computing" by Randy
67 Allen and Ken Kennedy.
68 http://citeseer.ist.psu.edu/goff91practical.html
70 - "Loop Transformations for Restructuring Compilers - The Foundations"
78 #include "coretypes.h"
83 /* These RTL headers are needed for basic-block.h. */
85 #include "basic-block.h"
86 #include "diagnostic.h"
87 #include "tree-flow.h"
88 #include "tree-dump.h"
91 #include "tree-data-ref.h"
92 #include "tree-scalar-evolution.h"
93 #include "tree-pass.h"
94 #include "langhooks.h"
96 static struct datadep_stats
98 int num_dependence_tests;
99 int num_dependence_dependent;
100 int num_dependence_independent;
101 int num_dependence_undetermined;
103 int num_subscript_tests;
104 int num_subscript_undetermined;
105 int num_same_subscript_function;
108 int num_ziv_independent;
109 int num_ziv_dependent;
110 int num_ziv_unimplemented;
113 int num_siv_independent;
114 int num_siv_dependent;
115 int num_siv_unimplemented;
118 int num_miv_independent;
119 int num_miv_dependent;
120 int num_miv_unimplemented;
123 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
124 struct data_reference *,
125 struct data_reference *,
127 /* Returns true iff A divides B. */
130 tree_fold_divides_p (const_tree a, const_tree b)
132 gcc_assert (TREE_CODE (a) == INTEGER_CST);
133 gcc_assert (TREE_CODE (b) == INTEGER_CST);
134 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
137 /* Returns true iff A divides B. */
140 int_divides_p (int a, int b)
142 return ((b % a) == 0);
147 /* Dump into FILE all the data references from DATAREFS. */
150 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
153 struct data_reference *dr;
155 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
156 dump_data_reference (file, dr);
159 /* Dump to STDERR all the dependence relations from DDRS. */
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
174 struct data_dependence_relation *ddr;
176 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
177 dump_data_dependence_relation (file, ddr);
180 /* Dump function for a DATA_REFERENCE structure. */
183 dump_data_reference (FILE *outf,
184 struct data_reference *dr)
188 fprintf (outf, "(Data Ref: \n stmt: ");
189 print_generic_stmt (outf, DR_STMT (dr), 0);
190 fprintf (outf, " ref: ");
191 print_generic_stmt (outf, DR_REF (dr), 0);
192 fprintf (outf, " base_object: ");
193 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
195 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
197 fprintf (outf, " Access function %d: ", i);
198 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
200 fprintf (outf, ")\n");
203 /* Dumps the affine function described by FN to the file OUTF. */
206 dump_affine_function (FILE *outf, affine_fn fn)
211 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
212 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
214 fprintf (outf, " + ");
215 print_generic_expr (outf, coef, TDF_SLIM);
216 fprintf (outf, " * x_%u", i);
220 /* Dumps the conflict function CF to the file OUTF. */
223 dump_conflict_function (FILE *outf, conflict_function *cf)
227 if (cf->n == NO_DEPENDENCE)
228 fprintf (outf, "no dependence\n");
229 else if (cf->n == NOT_KNOWN)
230 fprintf (outf, "not known\n");
233 for (i = 0; i < cf->n; i++)
236 dump_affine_function (outf, cf->fns[i]);
237 fprintf (outf, "]\n");
242 /* Dump function for a SUBSCRIPT structure. */
245 dump_subscript (FILE *outf, struct subscript *subscript)
247 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
249 fprintf (outf, "\n (subscript \n");
250 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
251 dump_conflict_function (outf, cf);
252 if (CF_NONTRIVIAL_P (cf))
254 tree last_iteration = SUB_LAST_CONFLICT (subscript);
255 fprintf (outf, " last_conflict: ");
256 print_generic_stmt (outf, last_iteration, 0);
259 cf = SUB_CONFLICTS_IN_B (subscript);
260 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
269 fprintf (outf, " (Subscript distance: ");
270 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
271 fprintf (outf, " )\n");
272 fprintf (outf, " )\n");
275 /* Print the classic direction vector DIRV to OUTF. */
278 print_direction_vector (FILE *outf,
284 for (eq = 0; eq < length; eq++)
286 enum data_dependence_direction dir = dirv[eq];
291 fprintf (outf, " +");
294 fprintf (outf, " -");
297 fprintf (outf, " =");
299 case dir_positive_or_equal:
300 fprintf (outf, " +=");
302 case dir_positive_or_negative:
303 fprintf (outf, " +-");
305 case dir_negative_or_equal:
306 fprintf (outf, " -=");
309 fprintf (outf, " *");
312 fprintf (outf, "indep");
316 fprintf (outf, "\n");
319 /* Print a vector of direction vectors. */
322 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
328 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
329 print_direction_vector (outf, v, length);
332 /* Print a vector of distance vectors. */
335 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
341 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
342 print_lambda_vector (outf, v, length);
348 debug_data_dependence_relation (struct data_dependence_relation *ddr)
350 dump_data_dependence_relation (stderr, ddr);
353 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
356 dump_data_dependence_relation (FILE *outf,
357 struct data_dependence_relation *ddr)
359 struct data_reference *dra, *drb;
363 fprintf (outf, "(Data Dep: \n");
365 dump_data_reference (outf, dra);
366 dump_data_reference (outf, drb);
368 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
369 fprintf (outf, " (don't know)\n");
371 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
372 fprintf (outf, " (no dependence)\n");
374 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
379 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
381 fprintf (outf, " access_fn_A: ");
382 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
383 fprintf (outf, " access_fn_B: ");
384 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
385 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
388 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
389 fprintf (outf, " loop nest: (");
390 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
391 fprintf (outf, "%d ", loopi->num);
392 fprintf (outf, ")\n");
394 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
396 fprintf (outf, " distance_vector: ");
397 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
401 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
403 fprintf (outf, " direction_vector: ");
404 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
409 fprintf (outf, ")\n");
412 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
415 dump_data_dependence_direction (FILE *file,
416 enum data_dependence_direction dir)
432 case dir_positive_or_negative:
433 fprintf (file, "+-");
436 case dir_positive_or_equal:
437 fprintf (file, "+=");
440 case dir_negative_or_equal:
441 fprintf (file, "-=");
453 /* Dumps the distance and direction vectors in FILE. DDRS contains
454 the dependence relations, and VECT_SIZE is the size of the
455 dependence vectors, or in other words the number of loops in the
459 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
462 struct data_dependence_relation *ddr;
465 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
466 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
468 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
470 fprintf (file, "DISTANCE_V (");
471 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
472 fprintf (file, ")\n");
475 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
477 fprintf (file, "DIRECTION_V (");
478 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
479 fprintf (file, ")\n");
483 fprintf (file, "\n\n");
486 /* Dumps the data dependence relations DDRS in FILE. */
489 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
492 struct data_dependence_relation *ddr;
494 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
495 dump_data_dependence_relation (file, ddr);
497 fprintf (file, "\n\n");
500 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
501 will be ssizetype. */
504 split_constant_offset (tree exp, tree *var, tree *off)
506 tree type = TREE_TYPE (exp), otype;
513 otype = TREE_TYPE (exp);
514 code = TREE_CODE (exp);
519 *var = build_int_cst (type, 0);
520 *off = fold_convert (ssizetype, exp);
523 case POINTER_PLUS_EXPR:
528 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
529 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
530 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
532 *off = size_binop (code, off0, off1);
536 off1 = TREE_OPERAND (exp, 1);
537 if (TREE_CODE (off1) != INTEGER_CST)
540 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
541 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
543 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
548 tree op, base, poffset;
549 HOST_WIDE_INT pbitsize, pbitpos;
550 enum machine_mode pmode;
551 int punsignedp, pvolatilep;
553 op = TREE_OPERAND (exp, 0);
554 if (!handled_component_p (op))
557 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
558 &pmode, &punsignedp, &pvolatilep, false);
560 if (pbitpos % BITS_PER_UNIT != 0)
562 base = build_fold_addr_expr (base);
563 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
567 split_constant_offset (poffset, &poffset, &off1);
568 off0 = size_binop (PLUS_EXPR, off0, off1);
569 if (POINTER_TYPE_P (TREE_TYPE (base)))
570 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
571 base, fold_convert (sizetype, poffset));
573 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
574 fold_convert (TREE_TYPE (base), poffset));
577 var0 = fold_convert (type, base);
579 /* If variable length types are involved, punt, otherwise casts
580 might be converted into ARRAY_REFs in gimplify_conversion.
581 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
582 possibly no longer appears in current GIMPLE, might resurface.
583 This perhaps could run
584 if (TREE_CODE (var0) == NOP_EXPR
585 || TREE_CODE (var0) == CONVERT_EXPR)
587 gimplify_conversion (&var0);
588 // Attempt to fill in any within var0 found ARRAY_REF's
589 // element size from corresponding op embedded ARRAY_REF,
590 // if unsuccessful, just punt.
592 while (POINTER_TYPE_P (type))
593 type = TREE_TYPE (type);
594 if (int_size_in_bytes (type) < 0)
604 tree def_stmt = SSA_NAME_DEF_STMT (exp);
605 if (TREE_CODE (def_stmt) == GIMPLE_MODIFY_STMT)
607 tree def_stmt_rhs = GIMPLE_STMT_OPERAND (def_stmt, 1);
609 if (!TREE_SIDE_EFFECTS (def_stmt_rhs)
610 && EXPR_P (def_stmt_rhs)
611 && !REFERENCE_CLASS_P (def_stmt_rhs)
612 && !get_call_expr_in (def_stmt_rhs))
614 split_constant_offset (def_stmt_rhs, &var0, &off0);
615 var0 = fold_convert (type, var0);
628 *off = ssize_int (0);
631 /* Returns the address ADDR of an object in a canonical shape (without nop
632 casts, and with type of pointer to the object). */
635 canonicalize_base_object_address (tree addr)
641 /* The base address may be obtained by casting from integer, in that case
643 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
646 if (TREE_CODE (addr) != ADDR_EXPR)
649 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
652 /* Analyzes the behavior of the memory reference DR in the innermost loop that
656 dr_analyze_innermost (struct data_reference *dr)
658 tree stmt = DR_STMT (dr);
659 struct loop *loop = loop_containing_stmt (stmt);
660 tree ref = DR_REF (dr);
661 HOST_WIDE_INT pbitsize, pbitpos;
663 enum machine_mode pmode;
664 int punsignedp, pvolatilep;
665 affine_iv base_iv, offset_iv;
666 tree init, dinit, step;
668 if (dump_file && (dump_flags & TDF_DETAILS))
669 fprintf (dump_file, "analyze_innermost: ");
671 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
672 &pmode, &punsignedp, &pvolatilep, false);
673 gcc_assert (base != NULL_TREE);
675 if (pbitpos % BITS_PER_UNIT != 0)
677 if (dump_file && (dump_flags & TDF_DETAILS))
678 fprintf (dump_file, "failed: bit offset alignment.\n");
682 base = build_fold_addr_expr (base);
683 if (!simple_iv (loop, stmt, base, &base_iv, false))
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 fprintf (dump_file, "failed: evolution of base is not affine.\n");
691 offset_iv.base = ssize_int (0);
692 offset_iv.step = ssize_int (0);
694 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
696 if (dump_file && (dump_flags & TDF_DETAILS))
697 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
701 init = ssize_int (pbitpos / BITS_PER_UNIT);
702 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
703 init = size_binop (PLUS_EXPR, init, dinit);
704 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
705 init = size_binop (PLUS_EXPR, init, dinit);
707 step = size_binop (PLUS_EXPR,
708 fold_convert (ssizetype, base_iv.step),
709 fold_convert (ssizetype, offset_iv.step));
711 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
713 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
717 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
719 if (dump_file && (dump_flags & TDF_DETAILS))
720 fprintf (dump_file, "success.\n");
723 /* Determines the base object and the list of indices of memory reference
724 DR, analyzed in loop nest NEST. */
727 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
729 tree stmt = DR_STMT (dr);
730 struct loop *loop = loop_containing_stmt (stmt);
731 VEC (tree, heap) *access_fns = NULL;
732 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
733 tree base, off, access_fn;
735 while (handled_component_p (aref))
737 if (TREE_CODE (aref) == ARRAY_REF)
739 op = TREE_OPERAND (aref, 1);
740 access_fn = analyze_scalar_evolution (loop, op);
741 access_fn = resolve_mixers (nest, access_fn);
742 VEC_safe_push (tree, heap, access_fns, access_fn);
744 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
747 aref = TREE_OPERAND (aref, 0);
750 if (INDIRECT_REF_P (aref))
752 op = TREE_OPERAND (aref, 0);
753 access_fn = analyze_scalar_evolution (loop, op);
754 access_fn = resolve_mixers (nest, access_fn);
755 base = initial_condition (access_fn);
756 split_constant_offset (base, &base, &off);
757 access_fn = chrec_replace_initial_condition (access_fn,
758 fold_convert (TREE_TYPE (base), off));
760 TREE_OPERAND (aref, 0) = base;
761 VEC_safe_push (tree, heap, access_fns, access_fn);
764 DR_BASE_OBJECT (dr) = ref;
765 DR_ACCESS_FNS (dr) = access_fns;
768 /* Extracts the alias analysis information from the memory reference DR. */
771 dr_analyze_alias (struct data_reference *dr)
773 tree stmt = DR_STMT (dr);
774 tree ref = DR_REF (dr);
775 tree base = get_base_address (ref), addr, smt = NULL_TREE;
782 else if (INDIRECT_REF_P (base))
784 addr = TREE_OPERAND (base, 0);
785 if (TREE_CODE (addr) == SSA_NAME)
787 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
788 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
792 DR_SYMBOL_TAG (dr) = smt;
793 if (smt && var_can_have_subvars (smt))
794 DR_SUBVARS (dr) = get_subvars_for_var (smt);
796 vops = BITMAP_ALLOC (NULL);
797 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
799 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
805 /* Returns true if the address of DR is invariant. */
808 dr_address_invariant_p (struct data_reference *dr)
813 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
814 if (tree_contains_chrecs (idx, NULL))
820 /* Frees data reference DR. */
823 free_data_ref (data_reference_p dr)
825 BITMAP_FREE (DR_VOPS (dr));
826 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
830 /* Analyzes memory reference MEMREF accessed in STMT. The reference
831 is read if IS_READ is true, write otherwise. Returns the
832 data_reference description of MEMREF. NEST is the outermost loop of the
833 loop nest in that the reference should be analyzed. */
835 struct data_reference *
836 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
838 struct data_reference *dr;
840 if (dump_file && (dump_flags & TDF_DETAILS))
842 fprintf (dump_file, "Creating dr for ");
843 print_generic_expr (dump_file, memref, TDF_SLIM);
844 fprintf (dump_file, "\n");
847 dr = XCNEW (struct data_reference);
849 DR_REF (dr) = memref;
850 DR_IS_READ (dr) = is_read;
852 dr_analyze_innermost (dr);
853 dr_analyze_indices (dr, nest);
854 dr_analyze_alias (dr);
856 if (dump_file && (dump_flags & TDF_DETAILS))
858 fprintf (dump_file, "\tbase_address: ");
859 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
860 fprintf (dump_file, "\n\toffset from base address: ");
861 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
862 fprintf (dump_file, "\n\tconstant offset from base address: ");
863 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
864 fprintf (dump_file, "\n\tstep: ");
865 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
866 fprintf (dump_file, "\n\taligned to: ");
867 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
868 fprintf (dump_file, "\n\tbase_object: ");
869 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
870 fprintf (dump_file, "\n\tsymbol tag: ");
871 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
872 fprintf (dump_file, "\n");
878 /* Returns true if FNA == FNB. */
881 affine_function_equal_p (affine_fn fna, affine_fn fnb)
883 unsigned i, n = VEC_length (tree, fna);
885 if (n != VEC_length (tree, fnb))
888 for (i = 0; i < n; i++)
889 if (!operand_equal_p (VEC_index (tree, fna, i),
890 VEC_index (tree, fnb, i), 0))
896 /* If all the functions in CF are the same, returns one of them,
897 otherwise returns NULL. */
900 common_affine_function (conflict_function *cf)
905 if (!CF_NONTRIVIAL_P (cf))
910 for (i = 1; i < cf->n; i++)
911 if (!affine_function_equal_p (comm, cf->fns[i]))
917 /* Returns the base of the affine function FN. */
920 affine_function_base (affine_fn fn)
922 return VEC_index (tree, fn, 0);
925 /* Returns true if FN is a constant. */
928 affine_function_constant_p (affine_fn fn)
933 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
934 if (!integer_zerop (coef))
940 /* Returns true if FN is the zero constant function. */
943 affine_function_zero_p (affine_fn fn)
945 return (integer_zerop (affine_function_base (fn))
946 && affine_function_constant_p (fn));
949 /* Returns a signed integer type with the largest precision from TA
953 signed_type_for_types (tree ta, tree tb)
955 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
956 return signed_type_for (ta);
958 return signed_type_for (tb);
961 /* Applies operation OP on affine functions FNA and FNB, and returns the
965 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
971 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
973 n = VEC_length (tree, fna);
974 m = VEC_length (tree, fnb);
978 n = VEC_length (tree, fnb);
979 m = VEC_length (tree, fna);
982 ret = VEC_alloc (tree, heap, m);
983 for (i = 0; i < n; i++)
985 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
986 TREE_TYPE (VEC_index (tree, fnb, i)));
988 VEC_quick_push (tree, ret,
989 fold_build2 (op, type,
990 VEC_index (tree, fna, i),
991 VEC_index (tree, fnb, i)));
994 for (; VEC_iterate (tree, fna, i, coef); i++)
995 VEC_quick_push (tree, ret,
996 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
997 coef, integer_zero_node));
998 for (; VEC_iterate (tree, fnb, i, coef); i++)
999 VEC_quick_push (tree, ret,
1000 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1001 integer_zero_node, coef));
1006 /* Returns the sum of affine functions FNA and FNB. */
1009 affine_fn_plus (affine_fn fna, affine_fn fnb)
1011 return affine_fn_op (PLUS_EXPR, fna, fnb);
1014 /* Returns the difference of affine functions FNA and FNB. */
1017 affine_fn_minus (affine_fn fna, affine_fn fnb)
1019 return affine_fn_op (MINUS_EXPR, fna, fnb);
1022 /* Frees affine function FN. */
1025 affine_fn_free (affine_fn fn)
1027 VEC_free (tree, heap, fn);
1030 /* Determine for each subscript in the data dependence relation DDR
1034 compute_subscript_distance (struct data_dependence_relation *ddr)
1036 conflict_function *cf_a, *cf_b;
1037 affine_fn fn_a, fn_b, diff;
1039 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1043 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1045 struct subscript *subscript;
1047 subscript = DDR_SUBSCRIPT (ddr, i);
1048 cf_a = SUB_CONFLICTS_IN_A (subscript);
1049 cf_b = SUB_CONFLICTS_IN_B (subscript);
1051 fn_a = common_affine_function (cf_a);
1052 fn_b = common_affine_function (cf_b);
1055 SUB_DISTANCE (subscript) = chrec_dont_know;
1058 diff = affine_fn_minus (fn_a, fn_b);
1060 if (affine_function_constant_p (diff))
1061 SUB_DISTANCE (subscript) = affine_function_base (diff);
1063 SUB_DISTANCE (subscript) = chrec_dont_know;
1065 affine_fn_free (diff);
1070 /* Returns the conflict function for "unknown". */
1072 static conflict_function *
1073 conflict_fn_not_known (void)
1075 conflict_function *fn = XCNEW (conflict_function);
1081 /* Returns the conflict function for "independent". */
1083 static conflict_function *
1084 conflict_fn_no_dependence (void)
1086 conflict_function *fn = XCNEW (conflict_function);
1087 fn->n = NO_DEPENDENCE;
1092 /* Returns true if the address of OBJ is invariant in LOOP. */
1095 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1097 while (handled_component_p (obj))
1099 if (TREE_CODE (obj) == ARRAY_REF)
1101 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1102 need to check the stride and the lower bound of the reference. */
1103 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1105 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1109 else if (TREE_CODE (obj) == COMPONENT_REF)
1111 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1115 obj = TREE_OPERAND (obj, 0);
1118 if (!INDIRECT_REF_P (obj))
1121 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1125 /* Returns true if A and B are accesses to different objects, or to different
1126 fields of the same object. */
1129 disjoint_objects_p (tree a, tree b)
1131 tree base_a, base_b;
1132 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1135 base_a = get_base_address (a);
1136 base_b = get_base_address (b);
1140 && base_a != base_b)
1143 if (!operand_equal_p (base_a, base_b, 0))
1146 /* Compare the component references of A and B. We must start from the inner
1147 ones, so record them to the vector first. */
1148 while (handled_component_p (a))
1150 VEC_safe_push (tree, heap, comp_a, a);
1151 a = TREE_OPERAND (a, 0);
1153 while (handled_component_p (b))
1155 VEC_safe_push (tree, heap, comp_b, b);
1156 b = TREE_OPERAND (b, 0);
1162 if (VEC_length (tree, comp_a) == 0
1163 || VEC_length (tree, comp_b) == 0)
1166 a = VEC_pop (tree, comp_a);
1167 b = VEC_pop (tree, comp_b);
1169 /* Real and imaginary part of a variable do not alias. */
1170 if ((TREE_CODE (a) == REALPART_EXPR
1171 && TREE_CODE (b) == IMAGPART_EXPR)
1172 || (TREE_CODE (a) == IMAGPART_EXPR
1173 && TREE_CODE (b) == REALPART_EXPR))
1179 if (TREE_CODE (a) != TREE_CODE (b))
1182 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1183 DR_BASE_OBJECT are always zero. */
1184 if (TREE_CODE (a) == ARRAY_REF)
1186 else if (TREE_CODE (a) == COMPONENT_REF)
1188 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1191 /* Different fields of unions may overlap. */
1192 base_a = TREE_OPERAND (a, 0);
1193 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1196 /* Different fields of structures cannot. */
1204 VEC_free (tree, heap, comp_a);
1205 VEC_free (tree, heap, comp_b);
1210 /* Returns false if we can prove that data references A and B do not alias,
1214 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1216 const_tree addr_a = DR_BASE_ADDRESS (a);
1217 const_tree addr_b = DR_BASE_ADDRESS (b);
1218 const_tree type_a, type_b;
1219 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1221 /* If the sets of virtual operands are disjoint, the memory references do not
1223 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1226 /* If the accessed objects are disjoint, the memory references do not
1228 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1231 if (!addr_a || !addr_b)
1234 /* If the references are based on different static objects, they cannot alias
1235 (PTA should be able to disambiguate such accesses, but often it fails to,
1236 since currently we cannot distinguish between pointer and offset in pointer
1238 if (TREE_CODE (addr_a) == ADDR_EXPR
1239 && TREE_CODE (addr_b) == ADDR_EXPR)
1240 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1242 /* An instruction writing through a restricted pointer is "independent" of any
1243 instruction reading or writing through a different restricted pointer,
1244 in the same block/scope. */
1246 type_a = TREE_TYPE (addr_a);
1247 type_b = TREE_TYPE (addr_b);
1248 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1250 if (TREE_CODE (addr_a) == SSA_NAME)
1251 decl_a = SSA_NAME_VAR (addr_a);
1252 if (TREE_CODE (addr_b) == SSA_NAME)
1253 decl_b = SSA_NAME_VAR (addr_b);
1255 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1256 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1257 && decl_a && DECL_P (decl_a)
1258 && decl_b && DECL_P (decl_b)
1260 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1261 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1267 /* Initialize a data dependence relation between data accesses A and
1268 B. NB_LOOPS is the number of loops surrounding the references: the
1269 size of the classic distance/direction vectors. */
1271 static struct data_dependence_relation *
1272 initialize_data_dependence_relation (struct data_reference *a,
1273 struct data_reference *b,
1274 VEC (loop_p, heap) *loop_nest)
1276 struct data_dependence_relation *res;
1279 res = XNEW (struct data_dependence_relation);
1282 DDR_LOOP_NEST (res) = NULL;
1283 DDR_REVERSED_P (res) = false;
1284 DDR_SUBSCRIPTS (res) = NULL;
1285 DDR_DIR_VECTS (res) = NULL;
1286 DDR_DIST_VECTS (res) = NULL;
1288 if (a == NULL || b == NULL)
1290 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1294 /* If the data references do not alias, then they are independent. */
1295 if (!dr_may_alias_p (a, b))
1297 DDR_ARE_DEPENDENT (res) = chrec_known;
1301 /* If the references do not access the same object, we do not know
1302 whether they alias or not. */
1303 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1305 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1309 /* If the base of the object is not invariant in the loop nest, we cannot
1310 analyze it. TODO -- in fact, it would suffice to record that there may
1311 be arbitrary dependences in the loops where the base object varies. */
1312 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1313 DR_BASE_OBJECT (a)))
1315 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1319 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1321 DDR_AFFINE_P (res) = true;
1322 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1323 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1324 DDR_LOOP_NEST (res) = loop_nest;
1325 DDR_INNER_LOOP (res) = 0;
1327 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1329 struct subscript *subscript;
1331 subscript = XNEW (struct subscript);
1332 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1333 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1334 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1335 SUB_DISTANCE (subscript) = chrec_dont_know;
1336 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1342 /* Frees memory used by the conflict function F. */
1345 free_conflict_function (conflict_function *f)
1349 if (CF_NONTRIVIAL_P (f))
1351 for (i = 0; i < f->n; i++)
1352 affine_fn_free (f->fns[i]);
1357 /* Frees memory used by SUBSCRIPTS. */
1360 free_subscripts (VEC (subscript_p, heap) *subscripts)
1365 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1367 free_conflict_function (s->conflicting_iterations_in_a);
1368 free_conflict_function (s->conflicting_iterations_in_b);
1370 VEC_free (subscript_p, heap, subscripts);
1373 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1377 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1380 if (dump_file && (dump_flags & TDF_DETAILS))
1382 fprintf (dump_file, "(dependence classified: ");
1383 print_generic_expr (dump_file, chrec, 0);
1384 fprintf (dump_file, ")\n");
1387 DDR_ARE_DEPENDENT (ddr) = chrec;
1388 free_subscripts (DDR_SUBSCRIPTS (ddr));
1389 DDR_SUBSCRIPTS (ddr) = NULL;
1392 /* The dependence relation DDR cannot be represented by a distance
1396 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1398 if (dump_file && (dump_flags & TDF_DETAILS))
1399 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1401 DDR_AFFINE_P (ddr) = false;
1406 /* This section contains the classic Banerjee tests. */
1408 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1409 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1412 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1414 return (evolution_function_is_constant_p (chrec_a)
1415 && evolution_function_is_constant_p (chrec_b));
1418 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1419 variable, i.e., if the SIV (Single Index Variable) test is true. */
1422 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1424 if ((evolution_function_is_constant_p (chrec_a)
1425 && evolution_function_is_univariate_p (chrec_b))
1426 || (evolution_function_is_constant_p (chrec_b)
1427 && evolution_function_is_univariate_p (chrec_a)))
1430 if (evolution_function_is_univariate_p (chrec_a)
1431 && evolution_function_is_univariate_p (chrec_b))
1433 switch (TREE_CODE (chrec_a))
1435 case POLYNOMIAL_CHREC:
1436 switch (TREE_CODE (chrec_b))
1438 case POLYNOMIAL_CHREC:
1439 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1454 /* Creates a conflict function with N dimensions. The affine functions
1455 in each dimension follow. */
1457 static conflict_function *
1458 conflict_fn (unsigned n, ...)
1461 conflict_function *ret = XCNEW (conflict_function);
1464 gcc_assert (0 < n && n <= MAX_DIM);
1468 for (i = 0; i < n; i++)
1469 ret->fns[i] = va_arg (ap, affine_fn);
1475 /* Returns constant affine function with value CST. */
1478 affine_fn_cst (tree cst)
1480 affine_fn fn = VEC_alloc (tree, heap, 1);
1481 VEC_quick_push (tree, fn, cst);
1485 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1488 affine_fn_univar (tree cst, unsigned dim, tree coef)
1490 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1493 gcc_assert (dim > 0);
1494 VEC_quick_push (tree, fn, cst);
1495 for (i = 1; i < dim; i++)
1496 VEC_quick_push (tree, fn, integer_zero_node);
1497 VEC_quick_push (tree, fn, coef);
1501 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1502 *OVERLAPS_B are initialized to the functions that describe the
1503 relation between the elements accessed twice by CHREC_A and
1504 CHREC_B. For k >= 0, the following property is verified:
1506 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1509 analyze_ziv_subscript (tree chrec_a,
1511 conflict_function **overlaps_a,
1512 conflict_function **overlaps_b,
1513 tree *last_conflicts)
1515 tree type, difference;
1516 dependence_stats.num_ziv++;
1518 if (dump_file && (dump_flags & TDF_DETAILS))
1519 fprintf (dump_file, "(analyze_ziv_subscript \n");
1521 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1522 chrec_a = chrec_convert (type, chrec_a, NULL_TREE);
1523 chrec_b = chrec_convert (type, chrec_b, NULL_TREE);
1524 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1526 switch (TREE_CODE (difference))
1529 if (integer_zerop (difference))
1531 /* The difference is equal to zero: the accessed index
1532 overlaps for each iteration in the loop. */
1533 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1534 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1535 *last_conflicts = chrec_dont_know;
1536 dependence_stats.num_ziv_dependent++;
1540 /* The accesses do not overlap. */
1541 *overlaps_a = conflict_fn_no_dependence ();
1542 *overlaps_b = conflict_fn_no_dependence ();
1543 *last_conflicts = integer_zero_node;
1544 dependence_stats.num_ziv_independent++;
1549 /* We're not sure whether the indexes overlap. For the moment,
1550 conservatively answer "don't know". */
1551 if (dump_file && (dump_flags & TDF_DETAILS))
1552 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1554 *overlaps_a = conflict_fn_not_known ();
1555 *overlaps_b = conflict_fn_not_known ();
1556 *last_conflicts = chrec_dont_know;
1557 dependence_stats.num_ziv_unimplemented++;
1561 if (dump_file && (dump_flags & TDF_DETAILS))
1562 fprintf (dump_file, ")\n");
1565 /* Sets NIT to the estimated number of executions of the statements in
1566 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1567 large as the number of iterations. If we have no reliable estimate,
1568 the function returns false, otherwise returns true. */
1571 estimated_loop_iterations (struct loop *loop, bool conservative,
1574 estimate_numbers_of_iterations_loop (loop);
1577 if (!loop->any_upper_bound)
1580 *nit = loop->nb_iterations_upper_bound;
1584 if (!loop->any_estimate)
1587 *nit = loop->nb_iterations_estimate;
1593 /* Similar to estimated_loop_iterations, but returns the estimate only
1594 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1595 on the number of iterations of LOOP could not be derived, returns -1. */
1598 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1601 HOST_WIDE_INT hwi_nit;
1603 if (!estimated_loop_iterations (loop, conservative, &nit))
1606 if (!double_int_fits_in_shwi_p (nit))
1608 hwi_nit = double_int_to_shwi (nit);
1610 return hwi_nit < 0 ? -1 : hwi_nit;
1613 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1614 and only if it fits to the int type. If this is not the case, or the
1615 estimate on the number of iterations of LOOP could not be derived, returns
1619 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1624 if (!estimated_loop_iterations (loop, conservative, &nit))
1625 return chrec_dont_know;
1627 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1628 if (!double_int_fits_to_tree_p (type, nit))
1629 return chrec_dont_know;
1631 return double_int_to_tree (type, nit);
1634 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1635 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1636 *OVERLAPS_B are initialized to the functions that describe the
1637 relation between the elements accessed twice by CHREC_A and
1638 CHREC_B. For k >= 0, the following property is verified:
1640 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1643 analyze_siv_subscript_cst_affine (tree chrec_a,
1645 conflict_function **overlaps_a,
1646 conflict_function **overlaps_b,
1647 tree *last_conflicts)
1649 bool value0, value1, value2;
1650 tree type, difference, tmp;
1652 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1653 chrec_a = chrec_convert (type, chrec_a, NULL_TREE);
1654 chrec_b = chrec_convert (type, chrec_b, NULL_TREE);
1655 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1657 if (!chrec_is_positive (initial_condition (difference), &value0))
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1660 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1662 dependence_stats.num_siv_unimplemented++;
1663 *overlaps_a = conflict_fn_not_known ();
1664 *overlaps_b = conflict_fn_not_known ();
1665 *last_conflicts = chrec_dont_know;
1670 if (value0 == false)
1672 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1674 if (dump_file && (dump_flags & TDF_DETAILS))
1675 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1677 *overlaps_a = conflict_fn_not_known ();
1678 *overlaps_b = conflict_fn_not_known ();
1679 *last_conflicts = chrec_dont_know;
1680 dependence_stats.num_siv_unimplemented++;
1689 chrec_b = {10, +, 1}
1692 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1694 HOST_WIDE_INT numiter;
1695 struct loop *loop = get_chrec_loop (chrec_b);
1697 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1698 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1699 fold_build1 (ABS_EXPR, type, difference),
1700 CHREC_RIGHT (chrec_b));
1701 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1702 *last_conflicts = integer_one_node;
1705 /* Perform weak-zero siv test to see if overlap is
1706 outside the loop bounds. */
1707 numiter = estimated_loop_iterations_int (loop, false);
1710 && compare_tree_int (tmp, numiter) > 0)
1712 free_conflict_function (*overlaps_a);
1713 free_conflict_function (*overlaps_b);
1714 *overlaps_a = conflict_fn_no_dependence ();
1715 *overlaps_b = conflict_fn_no_dependence ();
1716 *last_conflicts = integer_zero_node;
1717 dependence_stats.num_siv_independent++;
1720 dependence_stats.num_siv_dependent++;
1724 /* When the step does not divide the difference, there are
1728 *overlaps_a = conflict_fn_no_dependence ();
1729 *overlaps_b = conflict_fn_no_dependence ();
1730 *last_conflicts = integer_zero_node;
1731 dependence_stats.num_siv_independent++;
1740 chrec_b = {10, +, -1}
1742 In this case, chrec_a will not overlap with chrec_b. */
1743 *overlaps_a = conflict_fn_no_dependence ();
1744 *overlaps_b = conflict_fn_no_dependence ();
1745 *last_conflicts = integer_zero_node;
1746 dependence_stats.num_siv_independent++;
1753 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1755 if (dump_file && (dump_flags & TDF_DETAILS))
1756 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1758 *overlaps_a = conflict_fn_not_known ();
1759 *overlaps_b = conflict_fn_not_known ();
1760 *last_conflicts = chrec_dont_know;
1761 dependence_stats.num_siv_unimplemented++;
1766 if (value2 == false)
1770 chrec_b = {10, +, -1}
1772 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1774 HOST_WIDE_INT numiter;
1775 struct loop *loop = get_chrec_loop (chrec_b);
1777 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1778 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1779 CHREC_RIGHT (chrec_b));
1780 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1781 *last_conflicts = integer_one_node;
1783 /* Perform weak-zero siv test to see if overlap is
1784 outside the loop bounds. */
1785 numiter = estimated_loop_iterations_int (loop, false);
1788 && compare_tree_int (tmp, numiter) > 0)
1790 free_conflict_function (*overlaps_a);
1791 free_conflict_function (*overlaps_b);
1792 *overlaps_a = conflict_fn_no_dependence ();
1793 *overlaps_b = conflict_fn_no_dependence ();
1794 *last_conflicts = integer_zero_node;
1795 dependence_stats.num_siv_independent++;
1798 dependence_stats.num_siv_dependent++;
1802 /* When the step does not divide the difference, there
1806 *overlaps_a = conflict_fn_no_dependence ();
1807 *overlaps_b = conflict_fn_no_dependence ();
1808 *last_conflicts = integer_zero_node;
1809 dependence_stats.num_siv_independent++;
1819 In this case, chrec_a will not overlap with chrec_b. */
1820 *overlaps_a = conflict_fn_no_dependence ();
1821 *overlaps_b = conflict_fn_no_dependence ();
1822 *last_conflicts = integer_zero_node;
1823 dependence_stats.num_siv_independent++;
1831 /* Helper recursive function for initializing the matrix A. Returns
1832 the initial value of CHREC. */
1834 static HOST_WIDE_INT
1835 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1839 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1840 return int_cst_value (chrec);
1842 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1843 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1846 #define FLOOR_DIV(x,y) ((x) / (y))
1848 /* Solves the special case of the Diophantine equation:
1849 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1851 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1852 number of iterations that loops X and Y run. The overlaps will be
1853 constructed as evolutions in dimension DIM. */
1856 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1857 affine_fn *overlaps_a,
1858 affine_fn *overlaps_b,
1859 tree *last_conflicts, int dim)
1861 if (((step_a > 0 && step_b > 0)
1862 || (step_a < 0 && step_b < 0)))
1864 int step_overlaps_a, step_overlaps_b;
1865 int gcd_steps_a_b, last_conflict, tau2;
1867 gcd_steps_a_b = gcd (step_a, step_b);
1868 step_overlaps_a = step_b / gcd_steps_a_b;
1869 step_overlaps_b = step_a / gcd_steps_a_b;
1873 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1874 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1875 last_conflict = tau2;
1876 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1879 *last_conflicts = chrec_dont_know;
1881 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1882 build_int_cst (NULL_TREE,
1884 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1885 build_int_cst (NULL_TREE,
1891 *overlaps_a = affine_fn_cst (integer_zero_node);
1892 *overlaps_b = affine_fn_cst (integer_zero_node);
1893 *last_conflicts = integer_zero_node;
1897 /* Solves the special case of a Diophantine equation where CHREC_A is
1898 an affine bivariate function, and CHREC_B is an affine univariate
1899 function. For example,
1901 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1903 has the following overlapping functions:
1905 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1906 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1907 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1909 FORNOW: This is a specialized implementation for a case occurring in
1910 a common benchmark. Implement the general algorithm. */
1913 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1914 conflict_function **overlaps_a,
1915 conflict_function **overlaps_b,
1916 tree *last_conflicts)
1918 bool xz_p, yz_p, xyz_p;
1919 int step_x, step_y, step_z;
1920 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1921 affine_fn overlaps_a_xz, overlaps_b_xz;
1922 affine_fn overlaps_a_yz, overlaps_b_yz;
1923 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1924 affine_fn ova1, ova2, ovb;
1925 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1927 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1928 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1929 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1932 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1934 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1935 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1937 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1939 if (dump_file && (dump_flags & TDF_DETAILS))
1940 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1942 *overlaps_a = conflict_fn_not_known ();
1943 *overlaps_b = conflict_fn_not_known ();
1944 *last_conflicts = chrec_dont_know;
1948 niter = MIN (niter_x, niter_z);
1949 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1952 &last_conflicts_xz, 1);
1953 niter = MIN (niter_y, niter_z);
1954 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1957 &last_conflicts_yz, 2);
1958 niter = MIN (niter_x, niter_z);
1959 niter = MIN (niter_y, niter);
1960 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1963 &last_conflicts_xyz, 3);
1965 xz_p = !integer_zerop (last_conflicts_xz);
1966 yz_p = !integer_zerop (last_conflicts_yz);
1967 xyz_p = !integer_zerop (last_conflicts_xyz);
1969 if (xz_p || yz_p || xyz_p)
1971 ova1 = affine_fn_cst (integer_zero_node);
1972 ova2 = affine_fn_cst (integer_zero_node);
1973 ovb = affine_fn_cst (integer_zero_node);
1976 affine_fn t0 = ova1;
1979 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1980 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1981 affine_fn_free (t0);
1982 affine_fn_free (t2);
1983 *last_conflicts = last_conflicts_xz;
1987 affine_fn t0 = ova2;
1990 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1991 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1992 affine_fn_free (t0);
1993 affine_fn_free (t2);
1994 *last_conflicts = last_conflicts_yz;
1998 affine_fn t0 = ova1;
1999 affine_fn t2 = ova2;
2002 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2003 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2004 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2005 affine_fn_free (t0);
2006 affine_fn_free (t2);
2007 affine_fn_free (t4);
2008 *last_conflicts = last_conflicts_xyz;
2010 *overlaps_a = conflict_fn (2, ova1, ova2);
2011 *overlaps_b = conflict_fn (1, ovb);
2015 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2016 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2017 *last_conflicts = integer_zero_node;
2020 affine_fn_free (overlaps_a_xz);
2021 affine_fn_free (overlaps_b_xz);
2022 affine_fn_free (overlaps_a_yz);
2023 affine_fn_free (overlaps_b_yz);
2024 affine_fn_free (overlaps_a_xyz);
2025 affine_fn_free (overlaps_b_xyz);
2028 /* Determines the overlapping elements due to accesses CHREC_A and
2029 CHREC_B, that are affine functions. This function cannot handle
2030 symbolic evolution functions, ie. when initial conditions are
2031 parameters, because it uses lambda matrices of integers. */
2034 analyze_subscript_affine_affine (tree chrec_a,
2036 conflict_function **overlaps_a,
2037 conflict_function **overlaps_b,
2038 tree *last_conflicts)
2040 unsigned nb_vars_a, nb_vars_b, dim;
2041 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2042 lambda_matrix A, U, S;
2044 if (eq_evolutions_p (chrec_a, chrec_b))
2046 /* The accessed index overlaps for each iteration in the
2048 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2049 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2050 *last_conflicts = chrec_dont_know;
2053 if (dump_file && (dump_flags & TDF_DETAILS))
2054 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2056 /* For determining the initial intersection, we have to solve a
2057 Diophantine equation. This is the most time consuming part.
2059 For answering to the question: "Is there a dependence?" we have
2060 to prove that there exists a solution to the Diophantine
2061 equation, and that the solution is in the iteration domain,
2062 i.e. the solution is positive or zero, and that the solution
2063 happens before the upper bound loop.nb_iterations. Otherwise
2064 there is no dependence. This function outputs a description of
2065 the iterations that hold the intersections. */
2067 nb_vars_a = nb_vars_in_chrec (chrec_a);
2068 nb_vars_b = nb_vars_in_chrec (chrec_b);
2070 dim = nb_vars_a + nb_vars_b;
2071 U = lambda_matrix_new (dim, dim);
2072 A = lambda_matrix_new (dim, 1);
2073 S = lambda_matrix_new (dim, 1);
2075 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
2076 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
2077 gamma = init_b - init_a;
2079 /* Don't do all the hard work of solving the Diophantine equation
2080 when we already know the solution: for example,
2083 | gamma = 3 - 3 = 0.
2084 Then the first overlap occurs during the first iterations:
2085 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2089 if (nb_vars_a == 1 && nb_vars_b == 1)
2091 HOST_WIDE_INT step_a, step_b;
2092 HOST_WIDE_INT niter, niter_a, niter_b;
2095 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2097 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2099 niter = MIN (niter_a, niter_b);
2100 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2101 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2103 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2106 *overlaps_a = conflict_fn (1, ova);
2107 *overlaps_b = conflict_fn (1, ovb);
2110 else if (nb_vars_a == 2 && nb_vars_b == 1)
2111 compute_overlap_steps_for_affine_1_2
2112 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2114 else if (nb_vars_a == 1 && nb_vars_b == 2)
2115 compute_overlap_steps_for_affine_1_2
2116 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2120 if (dump_file && (dump_flags & TDF_DETAILS))
2121 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2122 *overlaps_a = conflict_fn_not_known ();
2123 *overlaps_b = conflict_fn_not_known ();
2124 *last_conflicts = chrec_dont_know;
2126 goto end_analyze_subs_aa;
2130 lambda_matrix_right_hermite (A, dim, 1, S, U);
2135 lambda_matrix_row_negate (U, dim, 0);
2137 gcd_alpha_beta = S[0][0];
2139 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2140 but that is a quite strange case. Instead of ICEing, answer
2142 if (gcd_alpha_beta == 0)
2144 *overlaps_a = conflict_fn_not_known ();
2145 *overlaps_b = conflict_fn_not_known ();
2146 *last_conflicts = chrec_dont_know;
2147 goto end_analyze_subs_aa;
2150 /* The classic "gcd-test". */
2151 if (!int_divides_p (gcd_alpha_beta, gamma))
2153 /* The "gcd-test" has determined that there is no integer
2154 solution, i.e. there is no dependence. */
2155 *overlaps_a = conflict_fn_no_dependence ();
2156 *overlaps_b = conflict_fn_no_dependence ();
2157 *last_conflicts = integer_zero_node;
2160 /* Both access functions are univariate. This includes SIV and MIV cases. */
2161 else if (nb_vars_a == 1 && nb_vars_b == 1)
2163 /* Both functions should have the same evolution sign. */
2164 if (((A[0][0] > 0 && -A[1][0] > 0)
2165 || (A[0][0] < 0 && -A[1][0] < 0)))
2167 /* The solutions are given by:
2169 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2172 For a given integer t. Using the following variables,
2174 | i0 = u11 * gamma / gcd_alpha_beta
2175 | j0 = u12 * gamma / gcd_alpha_beta
2182 | y0 = j0 + j1 * t. */
2183 HOST_WIDE_INT i0, j0, i1, j1;
2185 i0 = U[0][0] * gamma / gcd_alpha_beta;
2186 j0 = U[0][1] * gamma / gcd_alpha_beta;
2190 if ((i1 == 0 && i0 < 0)
2191 || (j1 == 0 && j0 < 0))
2193 /* There is no solution.
2194 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2195 falls in here, but for the moment we don't look at the
2196 upper bound of the iteration domain. */
2197 *overlaps_a = conflict_fn_no_dependence ();
2198 *overlaps_b = conflict_fn_no_dependence ();
2199 *last_conflicts = integer_zero_node;
2200 goto end_analyze_subs_aa;
2203 if (i1 > 0 && j1 > 0)
2205 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2206 (get_chrec_loop (chrec_a), false);
2207 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2208 (get_chrec_loop (chrec_b), false);
2209 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2211 /* (X0, Y0) is a solution of the Diophantine equation:
2212 "chrec_a (X0) = chrec_b (Y0)". */
2213 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2215 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2216 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2218 /* (X1, Y1) is the smallest positive solution of the eq
2219 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2220 first conflict occurs. */
2221 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2222 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2223 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2227 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2228 FLOOR_DIV (niter - j0, j1));
2229 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2231 /* If the overlap occurs outside of the bounds of the
2232 loop, there is no dependence. */
2233 if (x1 > niter || y1 > niter)
2235 *overlaps_a = conflict_fn_no_dependence ();
2236 *overlaps_b = conflict_fn_no_dependence ();
2237 *last_conflicts = integer_zero_node;
2238 goto end_analyze_subs_aa;
2241 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2244 *last_conflicts = chrec_dont_know;
2248 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2250 build_int_cst (NULL_TREE, i1)));
2253 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2255 build_int_cst (NULL_TREE, j1)));
2259 /* FIXME: For the moment, the upper bound of the
2260 iteration domain for i and j is not checked. */
2261 if (dump_file && (dump_flags & TDF_DETAILS))
2262 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2263 *overlaps_a = conflict_fn_not_known ();
2264 *overlaps_b = conflict_fn_not_known ();
2265 *last_conflicts = chrec_dont_know;
2270 if (dump_file && (dump_flags & TDF_DETAILS))
2271 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2272 *overlaps_a = conflict_fn_not_known ();
2273 *overlaps_b = conflict_fn_not_known ();
2274 *last_conflicts = chrec_dont_know;
2279 if (dump_file && (dump_flags & TDF_DETAILS))
2280 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2281 *overlaps_a = conflict_fn_not_known ();
2282 *overlaps_b = conflict_fn_not_known ();
2283 *last_conflicts = chrec_dont_know;
2286 end_analyze_subs_aa:
2287 if (dump_file && (dump_flags & TDF_DETAILS))
2289 fprintf (dump_file, " (overlaps_a = ");
2290 dump_conflict_function (dump_file, *overlaps_a);
2291 fprintf (dump_file, ")\n (overlaps_b = ");
2292 dump_conflict_function (dump_file, *overlaps_b);
2293 fprintf (dump_file, ")\n");
2294 fprintf (dump_file, ")\n");
2298 /* Returns true when analyze_subscript_affine_affine can be used for
2299 determining the dependence relation between chrec_a and chrec_b,
2300 that contain symbols. This function modifies chrec_a and chrec_b
2301 such that the analysis result is the same, and such that they don't
2302 contain symbols, and then can safely be passed to the analyzer.
2304 Example: The analysis of the following tuples of evolutions produce
2305 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2308 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2309 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2313 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2315 tree diff, type, left_a, left_b, right_b;
2317 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2318 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2319 /* FIXME: For the moment not handled. Might be refined later. */
2322 type = chrec_type (*chrec_a);
2323 left_a = CHREC_LEFT (*chrec_a);
2324 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2325 diff = chrec_fold_minus (type, left_a, left_b);
2327 if (!evolution_function_is_constant_p (diff))
2330 if (dump_file && (dump_flags & TDF_DETAILS))
2331 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2333 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2334 diff, CHREC_RIGHT (*chrec_a));
2335 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2336 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2337 build_int_cst (type, 0),
2342 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2343 *OVERLAPS_B are initialized to the functions that describe the
2344 relation between the elements accessed twice by CHREC_A and
2345 CHREC_B. For k >= 0, the following property is verified:
2347 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2350 analyze_siv_subscript (tree chrec_a,
2352 conflict_function **overlaps_a,
2353 conflict_function **overlaps_b,
2354 tree *last_conflicts)
2356 dependence_stats.num_siv++;
2358 if (dump_file && (dump_flags & TDF_DETAILS))
2359 fprintf (dump_file, "(analyze_siv_subscript \n");
2361 if (evolution_function_is_constant_p (chrec_a)
2362 && evolution_function_is_affine_p (chrec_b))
2363 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2364 overlaps_a, overlaps_b, last_conflicts);
2366 else if (evolution_function_is_affine_p (chrec_a)
2367 && evolution_function_is_constant_p (chrec_b))
2368 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2369 overlaps_b, overlaps_a, last_conflicts);
2371 else if (evolution_function_is_affine_p (chrec_a)
2372 && evolution_function_is_affine_p (chrec_b))
2374 if (!chrec_contains_symbols (chrec_a)
2375 && !chrec_contains_symbols (chrec_b))
2377 analyze_subscript_affine_affine (chrec_a, chrec_b,
2378 overlaps_a, overlaps_b,
2381 if (CF_NOT_KNOWN_P (*overlaps_a)
2382 || CF_NOT_KNOWN_P (*overlaps_b))
2383 dependence_stats.num_siv_unimplemented++;
2384 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2385 || CF_NO_DEPENDENCE_P (*overlaps_b))
2386 dependence_stats.num_siv_independent++;
2388 dependence_stats.num_siv_dependent++;
2390 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2393 analyze_subscript_affine_affine (chrec_a, chrec_b,
2394 overlaps_a, overlaps_b,
2397 if (CF_NOT_KNOWN_P (*overlaps_a)
2398 || CF_NOT_KNOWN_P (*overlaps_b))
2399 dependence_stats.num_siv_unimplemented++;
2400 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2401 || CF_NO_DEPENDENCE_P (*overlaps_b))
2402 dependence_stats.num_siv_independent++;
2404 dependence_stats.num_siv_dependent++;
2407 goto siv_subscript_dontknow;
2412 siv_subscript_dontknow:;
2413 if (dump_file && (dump_flags & TDF_DETAILS))
2414 fprintf (dump_file, "siv test failed: unimplemented.\n");
2415 *overlaps_a = conflict_fn_not_known ();
2416 *overlaps_b = conflict_fn_not_known ();
2417 *last_conflicts = chrec_dont_know;
2418 dependence_stats.num_siv_unimplemented++;
2421 if (dump_file && (dump_flags & TDF_DETAILS))
2422 fprintf (dump_file, ")\n");
2425 /* Returns false if we can prove that the greatest common divisor of the steps
2426 of CHREC does not divide CST, false otherwise. */
2429 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2431 HOST_WIDE_INT cd = 0, val;
2434 if (!host_integerp (cst, 0))
2436 val = tree_low_cst (cst, 0);
2438 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2440 step = CHREC_RIGHT (chrec);
2441 if (!host_integerp (step, 0))
2443 cd = gcd (cd, tree_low_cst (step, 0));
2444 chrec = CHREC_LEFT (chrec);
2447 return val % cd == 0;
2450 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2451 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2452 functions that describe the relation between the elements accessed
2453 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2456 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2459 analyze_miv_subscript (tree chrec_a,
2461 conflict_function **overlaps_a,
2462 conflict_function **overlaps_b,
2463 tree *last_conflicts,
2464 struct loop *loop_nest)
2466 /* FIXME: This is a MIV subscript, not yet handled.
2467 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2470 In the SIV test we had to solve a Diophantine equation with two
2471 variables. In the MIV case we have to solve a Diophantine
2472 equation with 2*n variables (if the subscript uses n IVs).
2474 tree type, difference;
2476 dependence_stats.num_miv++;
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, "(analyze_miv_subscript \n");
2480 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2481 chrec_a = chrec_convert (type, chrec_a, NULL_TREE);
2482 chrec_b = chrec_convert (type, chrec_b, NULL_TREE);
2483 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2485 if (eq_evolutions_p (chrec_a, chrec_b))
2487 /* Access functions are the same: all the elements are accessed
2488 in the same order. */
2489 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2490 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2491 *last_conflicts = estimated_loop_iterations_tree
2492 (get_chrec_loop (chrec_a), true);
2493 dependence_stats.num_miv_dependent++;
2496 else if (evolution_function_is_constant_p (difference)
2497 /* For the moment, the following is verified:
2498 evolution_function_is_affine_multivariate_p (chrec_a,
2500 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2502 /* testsuite/.../ssa-chrec-33.c
2503 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2505 The difference is 1, and all the evolution steps are multiples
2506 of 2, consequently there are no overlapping elements. */
2507 *overlaps_a = conflict_fn_no_dependence ();
2508 *overlaps_b = conflict_fn_no_dependence ();
2509 *last_conflicts = integer_zero_node;
2510 dependence_stats.num_miv_independent++;
2513 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2514 && !chrec_contains_symbols (chrec_a)
2515 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2516 && !chrec_contains_symbols (chrec_b))
2518 /* testsuite/.../ssa-chrec-35.c
2519 {0, +, 1}_2 vs. {0, +, 1}_3
2520 the overlapping elements are respectively located at iterations:
2521 {0, +, 1}_x and {0, +, 1}_x,
2522 in other words, we have the equality:
2523 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2526 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2527 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2529 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2530 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2532 analyze_subscript_affine_affine (chrec_a, chrec_b,
2533 overlaps_a, overlaps_b, last_conflicts);
2535 if (CF_NOT_KNOWN_P (*overlaps_a)
2536 || CF_NOT_KNOWN_P (*overlaps_b))
2537 dependence_stats.num_miv_unimplemented++;
2538 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2539 || CF_NO_DEPENDENCE_P (*overlaps_b))
2540 dependence_stats.num_miv_independent++;
2542 dependence_stats.num_miv_dependent++;
2547 /* When the analysis is too difficult, answer "don't know". */
2548 if (dump_file && (dump_flags & TDF_DETAILS))
2549 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2551 *overlaps_a = conflict_fn_not_known ();
2552 *overlaps_b = conflict_fn_not_known ();
2553 *last_conflicts = chrec_dont_know;
2554 dependence_stats.num_miv_unimplemented++;
2557 if (dump_file && (dump_flags & TDF_DETAILS))
2558 fprintf (dump_file, ")\n");
2561 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2562 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2563 OVERLAP_ITERATIONS_B are initialized with two functions that
2564 describe the iterations that contain conflicting elements.
2566 Remark: For an integer k >= 0, the following equality is true:
2568 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2572 analyze_overlapping_iterations (tree chrec_a,
2574 conflict_function **overlap_iterations_a,
2575 conflict_function **overlap_iterations_b,
2576 tree *last_conflicts, struct loop *loop_nest)
2578 unsigned int lnn = loop_nest->num;
2580 dependence_stats.num_subscript_tests++;
2582 if (dump_file && (dump_flags & TDF_DETAILS))
2584 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2585 fprintf (dump_file, " (chrec_a = ");
2586 print_generic_expr (dump_file, chrec_a, 0);
2587 fprintf (dump_file, ")\n (chrec_b = ");
2588 print_generic_expr (dump_file, chrec_b, 0);
2589 fprintf (dump_file, ")\n");
2592 if (chrec_a == NULL_TREE
2593 || chrec_b == NULL_TREE
2594 || chrec_contains_undetermined (chrec_a)
2595 || chrec_contains_undetermined (chrec_b))
2597 dependence_stats.num_subscript_undetermined++;
2599 *overlap_iterations_a = conflict_fn_not_known ();
2600 *overlap_iterations_b = conflict_fn_not_known ();
2603 /* If they are the same chrec, and are affine, they overlap
2604 on every iteration. */
2605 else if (eq_evolutions_p (chrec_a, chrec_b)
2606 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2608 dependence_stats.num_same_subscript_function++;
2609 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2610 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2611 *last_conflicts = chrec_dont_know;
2614 /* If they aren't the same, and aren't affine, we can't do anything
2616 else if ((chrec_contains_symbols (chrec_a)
2617 || chrec_contains_symbols (chrec_b))
2618 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2619 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2621 dependence_stats.num_subscript_undetermined++;
2622 *overlap_iterations_a = conflict_fn_not_known ();
2623 *overlap_iterations_b = conflict_fn_not_known ();
2626 else if (ziv_subscript_p (chrec_a, chrec_b))
2627 analyze_ziv_subscript (chrec_a, chrec_b,
2628 overlap_iterations_a, overlap_iterations_b,
2631 else if (siv_subscript_p (chrec_a, chrec_b))
2632 analyze_siv_subscript (chrec_a, chrec_b,
2633 overlap_iterations_a, overlap_iterations_b,
2637 analyze_miv_subscript (chrec_a, chrec_b,
2638 overlap_iterations_a, overlap_iterations_b,
2639 last_conflicts, loop_nest);
2641 if (dump_file && (dump_flags & TDF_DETAILS))
2643 fprintf (dump_file, " (overlap_iterations_a = ");
2644 dump_conflict_function (dump_file, *overlap_iterations_a);
2645 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2646 dump_conflict_function (dump_file, *overlap_iterations_b);
2647 fprintf (dump_file, ")\n");
2648 fprintf (dump_file, ")\n");
2652 /* Helper function for uniquely inserting distance vectors. */
2655 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2660 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2661 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2664 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2667 /* Helper function for uniquely inserting direction vectors. */
2670 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2675 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2676 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2679 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2682 /* Add a distance of 1 on all the loops outer than INDEX. If we
2683 haven't yet determined a distance for this outer loop, push a new
2684 distance vector composed of the previous distance, and a distance
2685 of 1 for this outer loop. Example:
2693 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2694 save (0, 1), then we have to save (1, 0). */
2697 add_outer_distances (struct data_dependence_relation *ddr,
2698 lambda_vector dist_v, int index)
2700 /* For each outer loop where init_v is not set, the accesses are
2701 in dependence of distance 1 in the loop. */
2702 while (--index >= 0)
2704 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2705 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2707 save_dist_v (ddr, save_v);
2711 /* Return false when fail to represent the data dependence as a
2712 distance vector. INIT_B is set to true when a component has been
2713 added to the distance vector DIST_V. INDEX_CARRY is then set to
2714 the index in DIST_V that carries the dependence. */
2717 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2718 struct data_reference *ddr_a,
2719 struct data_reference *ddr_b,
2720 lambda_vector dist_v, bool *init_b,
2724 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2726 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2728 tree access_fn_a, access_fn_b;
2729 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2731 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2733 non_affine_dependence_relation (ddr);
2737 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2738 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2740 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2741 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2744 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2745 DDR_LOOP_NEST (ddr));
2746 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2747 DDR_LOOP_NEST (ddr));
2749 /* The dependence is carried by the outermost loop. Example:
2756 In this case, the dependence is carried by loop_1. */
2757 index = index_a < index_b ? index_a : index_b;
2758 *index_carry = MIN (index, *index_carry);
2760 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2762 non_affine_dependence_relation (ddr);
2766 dist = int_cst_value (SUB_DISTANCE (subscript));
2768 /* This is the subscript coupling test. If we have already
2769 recorded a distance for this loop (a distance coming from
2770 another subscript), it should be the same. For example,
2771 in the following code, there is no dependence:
2778 if (init_v[index] != 0 && dist_v[index] != dist)
2780 finalize_ddr_dependent (ddr, chrec_known);
2784 dist_v[index] = dist;
2788 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2790 /* This can be for example an affine vs. constant dependence
2791 (T[i] vs. T[3]) that is not an affine dependence and is
2792 not representable as a distance vector. */
2793 non_affine_dependence_relation (ddr);
2801 /* Return true when the DDR contains only constant access functions. */
2804 constant_access_functions (const struct data_dependence_relation *ddr)
2808 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2809 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2810 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2816 /* Helper function for the case where DDR_A and DDR_B are the same
2817 multivariate access function with a constant step. For an example
2821 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2824 tree c_1 = CHREC_LEFT (c_2);
2825 tree c_0 = CHREC_LEFT (c_1);
2826 lambda_vector dist_v;
2829 /* Polynomials with more than 2 variables are not handled yet. When
2830 the evolution steps are parameters, it is not possible to
2831 represent the dependence using classical distance vectors. */
2832 if (TREE_CODE (c_0) != INTEGER_CST
2833 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2834 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2836 DDR_AFFINE_P (ddr) = false;
2840 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2841 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2843 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2844 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2845 v1 = int_cst_value (CHREC_RIGHT (c_1));
2846 v2 = int_cst_value (CHREC_RIGHT (c_2));
2859 save_dist_v (ddr, dist_v);
2861 add_outer_distances (ddr, dist_v, x_1);
2864 /* Helper function for the case where DDR_A and DDR_B are the same
2865 access functions. */
2868 add_other_self_distances (struct data_dependence_relation *ddr)
2870 lambda_vector dist_v;
2872 int index_carry = DDR_NB_LOOPS (ddr);
2874 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2876 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2878 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2880 if (!evolution_function_is_univariate_p (access_fun))
2882 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2884 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2888 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2890 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2891 add_multivariate_self_dist (ddr, access_fun);
2893 /* The evolution step is not constant: it varies in
2894 the outer loop, so this cannot be represented by a
2895 distance vector. For example in pr34635.c the
2896 evolution is {0, +, {0, +, 4}_1}_2. */
2897 DDR_AFFINE_P (ddr) = false;
2902 index_carry = MIN (index_carry,
2903 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2904 DDR_LOOP_NEST (ddr)));
2908 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2909 add_outer_distances (ddr, dist_v, index_carry);
2913 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2915 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2917 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2918 save_dist_v (ddr, dist_v);
2921 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2922 is the case for example when access functions are the same and
2923 equal to a constant, as in:
2930 in which case the distance vectors are (0) and (1). */
2933 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2937 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2939 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2940 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2941 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2943 for (j = 0; j < ca->n; j++)
2944 if (affine_function_zero_p (ca->fns[j]))
2946 insert_innermost_unit_dist_vector (ddr);
2950 for (j = 0; j < cb->n; j++)
2951 if (affine_function_zero_p (cb->fns[j]))
2953 insert_innermost_unit_dist_vector (ddr);
2959 /* Compute the classic per loop distance vector. DDR is the data
2960 dependence relation to build a vector from. Return false when fail
2961 to represent the data dependence as a distance vector. */
2964 build_classic_dist_vector (struct data_dependence_relation *ddr,
2965 struct loop *loop_nest)
2967 bool init_b = false;
2968 int index_carry = DDR_NB_LOOPS (ddr);
2969 lambda_vector dist_v;
2971 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2974 if (same_access_functions (ddr))
2976 /* Save the 0 vector. */
2977 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2978 save_dist_v (ddr, dist_v);
2980 if (constant_access_functions (ddr))
2981 add_distance_for_zero_overlaps (ddr);
2983 if (DDR_NB_LOOPS (ddr) > 1)
2984 add_other_self_distances (ddr);
2989 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2990 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2991 dist_v, &init_b, &index_carry))
2994 /* Save the distance vector if we initialized one. */
2997 /* Verify a basic constraint: classic distance vectors should
2998 always be lexicographically positive.
3000 Data references are collected in the order of execution of
3001 the program, thus for the following loop
3003 | for (i = 1; i < 100; i++)
3004 | for (j = 1; j < 100; j++)
3006 | t = T[j+1][i-1]; // A
3007 | T[j][i] = t + 2; // B
3010 references are collected following the direction of the wind:
3011 A then B. The data dependence tests are performed also
3012 following this order, such that we're looking at the distance
3013 separating the elements accessed by A from the elements later
3014 accessed by B. But in this example, the distance returned by
3015 test_dep (A, B) is lexicographically negative (-1, 1), that
3016 means that the access A occurs later than B with respect to
3017 the outer loop, ie. we're actually looking upwind. In this
3018 case we solve test_dep (B, A) looking downwind to the
3019 lexicographically positive solution, that returns the
3020 distance vector (1, -1). */
3021 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3023 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3024 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3027 compute_subscript_distance (ddr);
3028 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3029 save_v, &init_b, &index_carry))
3031 save_dist_v (ddr, save_v);
3032 DDR_REVERSED_P (ddr) = true;
3034 /* In this case there is a dependence forward for all the
3037 | for (k = 1; k < 100; k++)
3038 | for (i = 1; i < 100; i++)
3039 | for (j = 1; j < 100; j++)
3041 | t = T[j+1][i-1]; // A
3042 | T[j][i] = t + 2; // B
3050 if (DDR_NB_LOOPS (ddr) > 1)
3052 add_outer_distances (ddr, save_v, index_carry);
3053 add_outer_distances (ddr, dist_v, index_carry);
3058 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3059 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3061 if (DDR_NB_LOOPS (ddr) > 1)
3063 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3065 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3066 DDR_A (ddr), loop_nest))
3068 compute_subscript_distance (ddr);
3069 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3070 opposite_v, &init_b,
3074 save_dist_v (ddr, save_v);
3075 add_outer_distances (ddr, dist_v, index_carry);
3076 add_outer_distances (ddr, opposite_v, index_carry);
3079 save_dist_v (ddr, save_v);
3084 /* There is a distance of 1 on all the outer loops: Example:
3085 there is a dependence of distance 1 on loop_1 for the array A.
3091 add_outer_distances (ddr, dist_v,
3092 lambda_vector_first_nz (dist_v,
3093 DDR_NB_LOOPS (ddr), 0));
3096 if (dump_file && (dump_flags & TDF_DETAILS))
3100 fprintf (dump_file, "(build_classic_dist_vector\n");
3101 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3103 fprintf (dump_file, " dist_vector = (");
3104 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3105 DDR_NB_LOOPS (ddr));
3106 fprintf (dump_file, " )\n");
3108 fprintf (dump_file, ")\n");
3114 /* Return the direction for a given distance.
3115 FIXME: Computing dir this way is suboptimal, since dir can catch
3116 cases that dist is unable to represent. */
3118 static inline enum data_dependence_direction
3119 dir_from_dist (int dist)
3122 return dir_positive;
3124 return dir_negative;
3129 /* Compute the classic per loop direction vector. DDR is the data
3130 dependence relation to build a vector from. */
3133 build_classic_dir_vector (struct data_dependence_relation *ddr)
3136 lambda_vector dist_v;
3138 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3140 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3142 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3143 dir_v[j] = dir_from_dist (dist_v[j]);
3145 save_dir_v (ddr, dir_v);
3149 /* Helper function. Returns true when there is a dependence between
3150 data references DRA and DRB. */
3153 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3154 struct data_reference *dra,
3155 struct data_reference *drb,
3156 struct loop *loop_nest)
3159 tree last_conflicts;
3160 struct subscript *subscript;
3162 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3165 conflict_function *overlaps_a, *overlaps_b;
3167 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3168 DR_ACCESS_FN (drb, i),
3169 &overlaps_a, &overlaps_b,
3170 &last_conflicts, loop_nest);
3172 if (CF_NOT_KNOWN_P (overlaps_a)
3173 || CF_NOT_KNOWN_P (overlaps_b))
3175 finalize_ddr_dependent (ddr, chrec_dont_know);
3176 dependence_stats.num_dependence_undetermined++;
3177 free_conflict_function (overlaps_a);
3178 free_conflict_function (overlaps_b);
3182 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3183 || CF_NO_DEPENDENCE_P (overlaps_b))
3185 finalize_ddr_dependent (ddr, chrec_known);
3186 dependence_stats.num_dependence_independent++;
3187 free_conflict_function (overlaps_a);
3188 free_conflict_function (overlaps_b);
3194 if (SUB_CONFLICTS_IN_A (subscript))
3195 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3196 if (SUB_CONFLICTS_IN_B (subscript))
3197 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3199 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3200 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3201 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3208 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3211 subscript_dependence_tester (struct data_dependence_relation *ddr,
3212 struct loop *loop_nest)
3215 if (dump_file && (dump_flags & TDF_DETAILS))
3216 fprintf (dump_file, "(subscript_dependence_tester \n");
3218 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3219 dependence_stats.num_dependence_dependent++;
3221 compute_subscript_distance (ddr);
3222 if (build_classic_dist_vector (ddr, loop_nest))
3223 build_classic_dir_vector (ddr);
3225 if (dump_file && (dump_flags & TDF_DETAILS))
3226 fprintf (dump_file, ")\n");
3229 /* Returns true when all the access functions of A are affine or
3230 constant with respect to LOOP_NEST. */
3233 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3234 const struct loop *loop_nest)
3237 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3240 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3241 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3242 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3248 /* Initializes an equation for an OMEGA problem using the information
3249 contained in the ACCESS_FUN. Returns true when the operation
3252 PB is the omega constraint system.
3253 EQ is the number of the equation to be initialized.
3254 OFFSET is used for shifting the variables names in the constraints:
3255 a constrain is composed of 2 * the number of variables surrounding
3256 dependence accesses. OFFSET is set either to 0 for the first n variables,
3257 then it is set to n.
3258 ACCESS_FUN is expected to be an affine chrec. */
3261 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3262 unsigned int offset, tree access_fun,
3263 struct data_dependence_relation *ddr)
3265 switch (TREE_CODE (access_fun))
3267 case POLYNOMIAL_CHREC:
3269 tree left = CHREC_LEFT (access_fun);
3270 tree right = CHREC_RIGHT (access_fun);
3271 int var = CHREC_VARIABLE (access_fun);
3274 if (TREE_CODE (right) != INTEGER_CST)
3277 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3278 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3280 /* Compute the innermost loop index. */
3281 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3284 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3285 += int_cst_value (right);
3287 switch (TREE_CODE (left))
3289 case POLYNOMIAL_CHREC:
3290 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3293 pb->eqs[eq].coef[0] += int_cst_value (left);
3302 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3310 /* As explained in the comments preceding init_omega_for_ddr, we have
3311 to set up a system for each loop level, setting outer loops
3312 variation to zero, and current loop variation to positive or zero.
3313 Save each lexico positive distance vector. */
3316 omega_extract_distance_vectors (omega_pb pb,
3317 struct data_dependence_relation *ddr)
3321 struct loop *loopi, *loopj;
3322 enum omega_result res;
3324 /* Set a new problem for each loop in the nest. The basis is the
3325 problem that we have initialized until now. On top of this we
3326 add new constraints. */
3327 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3328 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3331 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3332 DDR_NB_LOOPS (ddr));
3334 omega_copy_problem (copy, pb);
3336 /* For all the outer loops "loop_j", add "dj = 0". */
3338 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3340 eq = omega_add_zero_eq (copy, omega_black);
3341 copy->eqs[eq].coef[j + 1] = 1;
3344 /* For "loop_i", add "0 <= di". */
3345 geq = omega_add_zero_geq (copy, omega_black);
3346 copy->geqs[geq].coef[i + 1] = 1;
3348 /* Reduce the constraint system, and test that the current
3349 problem is feasible. */
3350 res = omega_simplify_problem (copy);
3351 if (res == omega_false
3352 || res == omega_unknown
3353 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3356 for (eq = 0; eq < copy->num_subs; eq++)
3357 if (copy->subs[eq].key == (int) i + 1)
3359 dist = copy->subs[eq].coef[0];
3365 /* Reinitialize problem... */
3366 omega_copy_problem (copy, pb);
3368 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3370 eq = omega_add_zero_eq (copy, omega_black);
3371 copy->eqs[eq].coef[j + 1] = 1;
3374 /* ..., but this time "di = 1". */
3375 eq = omega_add_zero_eq (copy, omega_black);
3376 copy->eqs[eq].coef[i + 1] = 1;
3377 copy->eqs[eq].coef[0] = -1;
3379 res = omega_simplify_problem (copy);
3380 if (res == omega_false
3381 || res == omega_unknown
3382 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3385 for (eq = 0; eq < copy->num_subs; eq++)
3386 if (copy->subs[eq].key == (int) i + 1)
3388 dist = copy->subs[eq].coef[0];
3394 /* Save the lexicographically positive distance vector. */
3397 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3398 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3402 for (eq = 0; eq < copy->num_subs; eq++)
3403 if (copy->subs[eq].key > 0)
3405 dist = copy->subs[eq].coef[0];
3406 dist_v[copy->subs[eq].key - 1] = dist;
3409 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3410 dir_v[j] = dir_from_dist (dist_v[j]);
3412 save_dist_v (ddr, dist_v);
3413 save_dir_v (ddr, dir_v);
3417 omega_free_problem (copy);
3421 /* This is called for each subscript of a tuple of data references:
3422 insert an equality for representing the conflicts. */
3425 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3426 struct data_dependence_relation *ddr,
3427 omega_pb pb, bool *maybe_dependent)
3430 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3431 TREE_TYPE (access_fun_b));
3432 tree fun_a = chrec_convert (type, access_fun_a, NULL_TREE);
3433 tree fun_b = chrec_convert (type, access_fun_b, NULL_TREE);
3434 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3436 /* When the fun_a - fun_b is not constant, the dependence is not
3437 captured by the classic distance vector representation. */
3438 if (TREE_CODE (difference) != INTEGER_CST)
3442 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3444 /* There is no dependence. */
3445 *maybe_dependent = false;
3449 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3451 eq = omega_add_zero_eq (pb, omega_black);
3452 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3453 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3454 /* There is probably a dependence, but the system of
3455 constraints cannot be built: answer "don't know". */
3459 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3460 && !int_divides_p (lambda_vector_gcd
3461 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3462 2 * DDR_NB_LOOPS (ddr)),
3463 pb->eqs[eq].coef[0]))
3465 /* There is no dependence. */
3466 *maybe_dependent = false;
3473 /* Helper function, same as init_omega_for_ddr but specialized for
3474 data references A and B. */
3477 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3478 struct data_dependence_relation *ddr,
3479 omega_pb pb, bool *maybe_dependent)
3484 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3486 /* Insert an equality per subscript. */
3487 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3489 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3490 ddr, pb, maybe_dependent))
3492 else if (*maybe_dependent == false)
3494 /* There is no dependence. */
3495 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3500 /* Insert inequalities: constraints corresponding to the iteration
3501 domain, i.e. the loops surrounding the references "loop_x" and
3502 the distance variables "dx". The layout of the OMEGA
3503 representation is as follows:
3504 - coef[0] is the constant
3505 - coef[1..nb_loops] are the protected variables that will not be
3506 removed by the solver: the "dx"
3507 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3509 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3510 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3512 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3515 ineq = omega_add_zero_geq (pb, omega_black);
3516 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3518 /* 0 <= loop_x + dx */
3519 ineq = omega_add_zero_geq (pb, omega_black);
3520 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3521 pb->geqs[ineq].coef[i + 1] = 1;
3525 /* loop_x <= nb_iters */
3526 ineq = omega_add_zero_geq (pb, omega_black);
3527 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3528 pb->geqs[ineq].coef[0] = nbi;
3530 /* loop_x + dx <= nb_iters */
3531 ineq = omega_add_zero_geq (pb, omega_black);
3532 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3533 pb->geqs[ineq].coef[i + 1] = -1;
3534 pb->geqs[ineq].coef[0] = nbi;
3536 /* A step "dx" bigger than nb_iters is not feasible, so
3537 add "0 <= nb_iters + dx", */
3538 ineq = omega_add_zero_geq (pb, omega_black);
3539 pb->geqs[ineq].coef[i + 1] = 1;
3540 pb->geqs[ineq].coef[0] = nbi;
3541 /* and "dx <= nb_iters". */
3542 ineq = omega_add_zero_geq (pb, omega_black);
3543 pb->geqs[ineq].coef[i + 1] = -1;
3544 pb->geqs[ineq].coef[0] = nbi;
3548 omega_extract_distance_vectors (pb, ddr);
3553 /* Sets up the Omega dependence problem for the data dependence
3554 relation DDR. Returns false when the constraint system cannot be
3555 built, ie. when the test answers "don't know". Returns true
3556 otherwise, and when independence has been proved (using one of the
3557 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3558 set MAYBE_DEPENDENT to true.
3560 Example: for setting up the dependence system corresponding to the
3561 conflicting accesses
3566 | ... A[2*j, 2*(i + j)]
3570 the following constraints come from the iteration domain:
3577 where di, dj are the distance variables. The constraints
3578 representing the conflicting elements are:
3581 i + 1 = 2 * (i + di + j + dj)
3583 For asking that the resulting distance vector (di, dj) be
3584 lexicographically positive, we insert the constraint "di >= 0". If
3585 "di = 0" in the solution, we fix that component to zero, and we
3586 look at the inner loops: we set a new problem where all the outer
3587 loop distances are zero, and fix this inner component to be
3588 positive. When one of the components is positive, we save that
3589 distance, and set a new problem where the distance on this loop is
3590 zero, searching for other distances in the inner loops. Here is
3591 the classic example that illustrates that we have to set for each
3592 inner loop a new problem:
3600 we have to save two distances (1, 0) and (0, 1).
3602 Given two array references, refA and refB, we have to set the
3603 dependence problem twice, refA vs. refB and refB vs. refA, and we
3604 cannot do a single test, as refB might occur before refA in the
3605 inner loops, and the contrary when considering outer loops: ex.
3610 | T[{1,+,1}_2][{1,+,1}_1] // refA
3611 | T[{2,+,1}_2][{0,+,1}_1] // refB
3616 refB touches the elements in T before refA, and thus for the same
3617 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3618 but for successive loop_0 iterations, we have (1, -1, 1)
3620 The Omega solver expects the distance variables ("di" in the
3621 previous example) to come first in the constraint system (as
3622 variables to be protected, or "safe" variables), the constraint
3623 system is built using the following layout:
3625 "cst | distance vars | index vars".
3629 init_omega_for_ddr (struct data_dependence_relation *ddr,
3630 bool *maybe_dependent)
3635 *maybe_dependent = true;
3637 if (same_access_functions (ddr))
3640 lambda_vector dir_v;
3642 /* Save the 0 vector. */
3643 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3644 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3645 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3646 dir_v[j] = dir_equal;
3647 save_dir_v (ddr, dir_v);
3649 /* Save the dependences carried by outer loops. */
3650 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3651 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3653 omega_free_problem (pb);
3657 /* Omega expects the protected variables (those that have to be kept
3658 after elimination) to appear first in the constraint system.
3659 These variables are the distance variables. In the following
3660 initialization we declare NB_LOOPS safe variables, and the total
3661 number of variables for the constraint system is 2*NB_LOOPS. */
3662 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3663 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3665 omega_free_problem (pb);
3667 /* Stop computation if not decidable, or no dependence. */
3668 if (res == false || *maybe_dependent == false)
3671 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3672 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3674 omega_free_problem (pb);
3679 /* Return true when DDR contains the same information as that stored
3680 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3683 ddr_consistent_p (FILE *file,
3684 struct data_dependence_relation *ddr,
3685 VEC (lambda_vector, heap) *dist_vects,
3686 VEC (lambda_vector, heap) *dir_vects)
3690 /* If dump_file is set, output there. */
3691 if (dump_file && (dump_flags & TDF_DETAILS))
3694 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3696 lambda_vector b_dist_v;
3697 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3698 VEC_length (lambda_vector, dist_vects),
3699 DDR_NUM_DIST_VECTS (ddr));
3701 fprintf (file, "Banerjee dist vectors:\n");
3702 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3703 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3705 fprintf (file, "Omega dist vectors:\n");
3706 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3707 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3709 fprintf (file, "data dependence relation:\n");
3710 dump_data_dependence_relation (file, ddr);
3712 fprintf (file, ")\n");
3716 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3718 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3719 VEC_length (lambda_vector, dir_vects),
3720 DDR_NUM_DIR_VECTS (ddr));
3724 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3726 lambda_vector a_dist_v;
3727 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3729 /* Distance vectors are not ordered in the same way in the DDR
3730 and in the DIST_VECTS: search for a matching vector. */
3731 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3732 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3735 if (j == VEC_length (lambda_vector, dist_vects))
3737 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3738 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3739 fprintf (file, "not found in Omega dist vectors:\n");
3740 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3741 fprintf (file, "data dependence relation:\n");
3742 dump_data_dependence_relation (file, ddr);
3743 fprintf (file, ")\n");
3747 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3749 lambda_vector a_dir_v;
3750 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3752 /* Direction vectors are not ordered in the same way in the DDR
3753 and in the DIR_VECTS: search for a matching vector. */
3754 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3755 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3758 if (j == VEC_length (lambda_vector, dist_vects))
3760 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3761 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3762 fprintf (file, "not found in Omega dir vectors:\n");
3763 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3764 fprintf (file, "data dependence relation:\n");
3765 dump_data_dependence_relation (file, ddr);
3766 fprintf (file, ")\n");
3773 /* This computes the affine dependence relation between A and B with
3774 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3775 independence between two accesses, while CHREC_DONT_KNOW is used
3776 for representing the unknown relation.
3778 Note that it is possible to stop the computation of the dependence
3779 relation the first time we detect a CHREC_KNOWN element for a given
3783 compute_affine_dependence (struct data_dependence_relation *ddr,
3784 struct loop *loop_nest)
3786 struct data_reference *dra = DDR_A (ddr);
3787 struct data_reference *drb = DDR_B (ddr);
3789 if (dump_file && (dump_flags & TDF_DETAILS))
3791 fprintf (dump_file, "(compute_affine_dependence\n");
3792 fprintf (dump_file, " (stmt_a = \n");
3793 print_generic_expr (dump_file, DR_STMT (dra), 0);
3794 fprintf (dump_file, ")\n (stmt_b = \n");
3795 print_generic_expr (dump_file, DR_STMT (drb), 0);
3796 fprintf (dump_file, ")\n");
3799 /* Analyze only when the dependence relation is not yet known. */
3800 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3802 dependence_stats.num_dependence_tests++;
3804 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3805 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3807 if (flag_check_data_deps)
3809 /* Compute the dependences using the first algorithm. */
3810 subscript_dependence_tester (ddr, loop_nest);
3812 if (dump_file && (dump_flags & TDF_DETAILS))
3814 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3815 dump_data_dependence_relation (dump_file, ddr);
3818 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3820 bool maybe_dependent;
3821 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3823 /* Save the result of the first DD analyzer. */
3824 dist_vects = DDR_DIST_VECTS (ddr);
3825 dir_vects = DDR_DIR_VECTS (ddr);
3827 /* Reset the information. */
3828 DDR_DIST_VECTS (ddr) = NULL;
3829 DDR_DIR_VECTS (ddr) = NULL;
3831 /* Compute the same information using Omega. */
3832 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3833 goto csys_dont_know;
3835 if (dump_file && (dump_flags & TDF_DETAILS))
3837 fprintf (dump_file, "Omega Analyzer\n");
3838 dump_data_dependence_relation (dump_file, ddr);
3841 /* Check that we get the same information. */
3842 if (maybe_dependent)
3843 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3848 subscript_dependence_tester (ddr, loop_nest);
3851 /* As a last case, if the dependence cannot be determined, or if
3852 the dependence is considered too difficult to determine, answer
3857 dependence_stats.num_dependence_undetermined++;
3859 if (dump_file && (dump_flags & TDF_DETAILS))
3861 fprintf (dump_file, "Data ref a:\n");
3862 dump_data_reference (dump_file, dra);
3863 fprintf (dump_file, "Data ref b:\n");
3864 dump_data_reference (dump_file, drb);
3865 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3867 finalize_ddr_dependent (ddr, chrec_dont_know);
3871 if (dump_file && (dump_flags & TDF_DETAILS))
3872 fprintf (dump_file, ")\n");
3875 /* This computes the dependence relation for the same data
3876 reference into DDR. */
3879 compute_self_dependence (struct data_dependence_relation *ddr)
3882 struct subscript *subscript;
3884 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3887 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3890 if (SUB_CONFLICTS_IN_A (subscript))
3891 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3892 if (SUB_CONFLICTS_IN_B (subscript))
3893 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3895 /* The accessed index overlaps for each iteration. */
3896 SUB_CONFLICTS_IN_A (subscript)
3897 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3898 SUB_CONFLICTS_IN_B (subscript)
3899 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3900 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3903 /* The distance vector is the zero vector. */
3904 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3905 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3908 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3909 the data references in DATAREFS, in the LOOP_NEST. When
3910 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3914 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3915 VEC (ddr_p, heap) **dependence_relations,
3916 VEC (loop_p, heap) *loop_nest,
3917 bool compute_self_and_rr)
3919 struct data_dependence_relation *ddr;
3920 struct data_reference *a, *b;
3923 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3924 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3925 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3927 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3928 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3929 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
3932 if (compute_self_and_rr)
3933 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3935 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3936 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3937 compute_self_dependence (ddr);
3941 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3942 true if STMT clobbers memory, false otherwise. */
3945 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3947 bool clobbers_memory = false;
3949 tree *op0, *op1, call;
3953 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3954 Calls have side-effects, except those to const or pure
3956 call = get_call_expr_in (stmt);
3958 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3959 || (TREE_CODE (stmt) == ASM_EXPR
3960 && ASM_VOLATILE_P (stmt)))
3961 clobbers_memory = true;
3963 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3964 return clobbers_memory;
3966 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3968 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3969 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3972 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
3974 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3976 ref->is_read = true;
3980 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3982 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3984 ref->is_read = false;
3990 unsigned i, n = call_expr_nargs (call);
3992 for (i = 0; i < n; i++)
3994 op0 = &CALL_EXPR_ARG (call, i);
3997 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
3999 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4001 ref->is_read = true;
4006 return clobbers_memory;
4009 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4010 reference, returns false, otherwise returns true. NEST is the outermost
4011 loop of the loop nest in that the references should be analyzed. */
4014 find_data_references_in_stmt (struct loop *nest, tree stmt,
4015 VEC (data_reference_p, heap) **datarefs)
4018 VEC (data_ref_loc, heap) *references;
4021 data_reference_p dr;
4023 if (get_references_in_stmt (stmt, &references))
4025 VEC_free (data_ref_loc, heap, references);
4029 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4031 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4032 gcc_assert (dr != NULL);
4034 /* FIXME -- data dependence analysis does not work correctly for objects with
4035 invariant addresses. Let us fail here until the problem is fixed. */
4036 if (dr_address_invariant_p (dr))
4039 if (dump_file && (dump_flags & TDF_DETAILS))
4040 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4045 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4047 VEC_free (data_ref_loc, heap, references);
4051 /* Search the data references in LOOP, and record the information into
4052 DATAREFS. Returns chrec_dont_know when failing to analyze a
4053 difficult case, returns NULL_TREE otherwise.
4055 TODO: This function should be made smarter so that it can handle address
4056 arithmetic as if they were array accesses, etc. */
4059 find_data_references_in_loop (struct loop *loop,
4060 VEC (data_reference_p, heap) **datarefs)
4062 basic_block bb, *bbs;
4064 block_stmt_iterator bsi;
4066 bbs = get_loop_body_in_dom_order (loop);
4068 for (i = 0; i < loop->num_nodes; i++)
4072 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4074 tree stmt = bsi_stmt (bsi);
4076 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4078 struct data_reference *res;
4079 res = XCNEW (struct data_reference);
4080 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4083 return chrec_dont_know;
4092 /* Recursive helper function. */
4095 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4097 /* Inner loops of the nest should not contain siblings. Example:
4098 when there are two consecutive loops,
4109 the dependence relation cannot be captured by the distance
4114 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4116 return find_loop_nest_1 (loop->inner, loop_nest);
4120 /* Return false when the LOOP is not well nested. Otherwise return
4121 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4122 contain the loops from the outermost to the innermost, as they will
4123 appear in the classic distance vector. */
4126 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4128 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4130 return find_loop_nest_1 (loop->inner, loop_nest);
4134 /* Given a loop nest LOOP, the following vectors are returned:
4135 DATAREFS is initialized to all the array elements contained in this loop,
4136 DEPENDENCE_RELATIONS contains the relations between the data references.
4137 Compute read-read and self relations if
4138 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4141 compute_data_dependences_for_loop (struct loop *loop,
4142 bool compute_self_and_read_read_dependences,
4143 VEC (data_reference_p, heap) **datarefs,
4144 VEC (ddr_p, heap) **dependence_relations)
4146 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4148 memset (&dependence_stats, 0, sizeof (dependence_stats));
4150 /* If the loop nest is not well formed, or one of the data references
4151 is not computable, give up without spending time to compute other
4154 || !find_loop_nest (loop, &vloops)
4155 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4157 struct data_dependence_relation *ddr;
4159 /* Insert a single relation into dependence_relations:
4161 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4162 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4165 compute_all_dependences (*datarefs, dependence_relations, vloops,
4166 compute_self_and_read_read_dependences);
4168 if (dump_file && (dump_flags & TDF_STATS))
4170 fprintf (dump_file, "Dependence tester statistics:\n");
4172 fprintf (dump_file, "Number of dependence tests: %d\n",
4173 dependence_stats.num_dependence_tests);
4174 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4175 dependence_stats.num_dependence_dependent);
4176 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4177 dependence_stats.num_dependence_independent);
4178 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4179 dependence_stats.num_dependence_undetermined);
4181 fprintf (dump_file, "Number of subscript tests: %d\n",
4182 dependence_stats.num_subscript_tests);
4183 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4184 dependence_stats.num_subscript_undetermined);
4185 fprintf (dump_file, "Number of same subscript function: %d\n",
4186 dependence_stats.num_same_subscript_function);
4188 fprintf (dump_file, "Number of ziv tests: %d\n",
4189 dependence_stats.num_ziv);
4190 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4191 dependence_stats.num_ziv_dependent);
4192 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4193 dependence_stats.num_ziv_independent);
4194 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4195 dependence_stats.num_ziv_unimplemented);
4197 fprintf (dump_file, "Number of siv tests: %d\n",
4198 dependence_stats.num_siv);
4199 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4200 dependence_stats.num_siv_dependent);
4201 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4202 dependence_stats.num_siv_independent);
4203 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4204 dependence_stats.num_siv_unimplemented);
4206 fprintf (dump_file, "Number of miv tests: %d\n",
4207 dependence_stats.num_miv);
4208 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4209 dependence_stats.num_miv_dependent);
4210 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4211 dependence_stats.num_miv_independent);
4212 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4213 dependence_stats.num_miv_unimplemented);
4217 /* Entry point (for testing only). Analyze all the data references
4218 and the dependence relations in LOOP.
4220 The data references are computed first.
4222 A relation on these nodes is represented by a complete graph. Some
4223 of the relations could be of no interest, thus the relations can be
4226 In the following function we compute all the relations. This is
4227 just a first implementation that is here for:
4228 - for showing how to ask for the dependence relations,
4229 - for the debugging the whole dependence graph,
4230 - for the dejagnu testcases and maintenance.
4232 It is possible to ask only for a part of the graph, avoiding to
4233 compute the whole dependence graph. The computed dependences are
4234 stored in a knowledge base (KB) such that later queries don't
4235 recompute the same information. The implementation of this KB is
4236 transparent to the optimizer, and thus the KB can be changed with a
4237 more efficient implementation, or the KB could be disabled. */
4239 analyze_all_data_dependences (struct loop *loop)
4242 int nb_data_refs = 10;
4243 VEC (data_reference_p, heap) *datarefs =
4244 VEC_alloc (data_reference_p, heap, nb_data_refs);
4245 VEC (ddr_p, heap) *dependence_relations =
4246 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4248 /* Compute DDs on the whole function. */
4249 compute_data_dependences_for_loop (loop, false, &datarefs,
4250 &dependence_relations);
4254 dump_data_dependence_relations (dump_file, dependence_relations);
4255 fprintf (dump_file, "\n\n");
4257 if (dump_flags & TDF_DETAILS)
4258 dump_dist_dir_vectors (dump_file, dependence_relations);
4260 if (dump_flags & TDF_STATS)
4262 unsigned nb_top_relations = 0;
4263 unsigned nb_bot_relations = 0;
4264 unsigned nb_basename_differ = 0;
4265 unsigned nb_chrec_relations = 0;
4266 struct data_dependence_relation *ddr;
4268 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4270 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4273 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4275 struct data_reference *a = DDR_A (ddr);
4276 struct data_reference *b = DDR_B (ddr);
4278 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4279 nb_basename_differ++;
4285 nb_chrec_relations++;
4288 gather_stats_on_scev_database ();
4292 free_dependence_relations (dependence_relations);
4293 free_data_refs (datarefs);
4296 /* Computes all the data dependences and check that the results of
4297 several analyzers are the same. */
4300 tree_check_data_deps (void)
4303 struct loop *loop_nest;
4305 FOR_EACH_LOOP (li, loop_nest, 0)
4306 analyze_all_data_dependences (loop_nest);
4309 /* Free the memory used by a data dependence relation DDR. */
4312 free_dependence_relation (struct data_dependence_relation *ddr)
4317 if (DDR_SUBSCRIPTS (ddr))
4318 free_subscripts (DDR_SUBSCRIPTS (ddr));
4319 if (DDR_DIST_VECTS (ddr))
4320 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4321 if (DDR_DIR_VECTS (ddr))
4322 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4327 /* Free the memory used by the data dependence relations from
4328 DEPENDENCE_RELATIONS. */
4331 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4334 struct data_dependence_relation *ddr;
4335 VEC (loop_p, heap) *loop_nest = NULL;
4337 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4341 if (loop_nest == NULL)
4342 loop_nest = DDR_LOOP_NEST (ddr);
4344 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4345 || DDR_LOOP_NEST (ddr) == loop_nest);
4346 free_dependence_relation (ddr);
4350 VEC_free (loop_p, heap, loop_nest);
4351 VEC_free (ddr_p, heap, dependence_relations);
4354 /* Free the memory used by the data references from DATAREFS. */
4357 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4360 struct data_reference *dr;
4362 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4364 VEC_free (data_reference_p, heap, datarefs);
4369 /* Dump vertex I in RDG to FILE. */
4372 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4374 struct vertex *v = &(rdg->vertices[i]);
4375 struct graph_edge *e;
4377 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4378 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4379 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4382 for (e = v->pred; e; e = e->pred_next)
4383 fprintf (file, " %d", e->src);
4385 fprintf (file, ") (out:");
4388 for (e = v->succ; e; e = e->succ_next)
4389 fprintf (file, " %d", e->dest);
4391 fprintf (file, ") \n");
4392 print_generic_stmt (file, RDGV_STMT (v), TDF_VOPS|TDF_MEMSYMS);
4393 fprintf (file, ")\n");
4396 /* Call dump_rdg_vertex on stderr. */
4399 debug_rdg_vertex (struct graph *rdg, int i)
4401 dump_rdg_vertex (stderr, rdg, i);
4404 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4405 dumped vertices to that bitmap. */
4407 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4411 fprintf (file, "(%d\n", c);
4413 for (i = 0; i < rdg->n_vertices; i++)
4414 if (rdg->vertices[i].component == c)
4417 bitmap_set_bit (dumped, i);
4419 dump_rdg_vertex (file, rdg, i);
4422 fprintf (file, ")\n");
4425 /* Call dump_rdg_vertex on stderr. */
4428 debug_rdg_component (struct graph *rdg, int c)
4430 dump_rdg_component (stderr, rdg, c, NULL);
4433 /* Dump the reduced dependence graph RDG to FILE. */
4436 dump_rdg (FILE *file, struct graph *rdg)
4439 bitmap dumped = BITMAP_ALLOC (NULL);
4441 fprintf (file, "(rdg\n");
4443 for (i = 0; i < rdg->n_vertices; i++)
4444 if (!bitmap_bit_p (dumped, i))
4445 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4447 fprintf (file, ")\n");
4448 BITMAP_FREE (dumped);
4451 /* Call dump_rdg on stderr. */
4454 debug_rdg (struct graph *rdg)
4456 dump_rdg (stderr, rdg);
4460 dot_rdg_1 (FILE *file, struct graph *rdg)
4464 fprintf (file, "digraph RDG {\n");
4466 for (i = 0; i < rdg->n_vertices; i++)
4468 struct vertex *v = &(rdg->vertices[i]);
4469 struct graph_edge *e;
4471 /* Highlight reads from memory. */
4472 if (RDG_MEM_READS_STMT (rdg, i))
4473 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4475 /* Highlight stores to memory. */
4476 if (RDG_MEM_WRITE_STMT (rdg, i))
4477 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4480 for (e = v->succ; e; e = e->succ_next)
4481 switch (RDGE_TYPE (e))
4484 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4488 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4492 /* These are the most common dependences: don't print these. */
4493 fprintf (file, "%d -> %d \n", i, e->dest);
4497 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4505 fprintf (file, "}\n\n");
4508 /* Display SCOP using dotty. */
4511 dot_rdg (struct graph *rdg)
4513 FILE *file = fopen ("/tmp/rdg.dot", "w");
4514 gcc_assert (file != NULL);
4516 dot_rdg_1 (file, rdg);
4519 system ("dotty /tmp/rdg.dot");
4523 /* This structure is used for recording the mapping statement index in
4526 struct rdg_vertex_info GTY(())
4532 /* Returns the index of STMT in RDG. */
4535 rdg_vertex_for_stmt (struct graph *rdg, tree stmt)
4537 struct rdg_vertex_info rvi, *slot;
4540 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4548 /* Creates an edge in RDG for each distance vector from DDR. The
4549 order that we keep track of in the RDG is the order in which
4550 statements have to be executed. */
4553 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4555 struct graph_edge *e;
4557 data_reference_p dra = DDR_A (ddr);
4558 data_reference_p drb = DDR_B (ddr);
4559 unsigned level = ddr_dependence_level (ddr);
4561 /* For non scalar dependences, when the dependence is REVERSED,
4562 statement B has to be executed before statement A. */
4564 && !DDR_REVERSED_P (ddr))
4566 data_reference_p tmp = dra;
4571 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4572 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4574 if (va < 0 || vb < 0)
4577 e = add_edge (rdg, va, vb);
4578 e->data = XNEW (struct rdg_edge);
4580 RDGE_LEVEL (e) = level;
4582 /* Determines the type of the data dependence. */
4583 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4584 RDGE_TYPE (e) = input_dd;
4585 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4586 RDGE_TYPE (e) = output_dd;
4587 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4588 RDGE_TYPE (e) = flow_dd;
4589 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4590 RDGE_TYPE (e) = anti_dd;
4593 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4594 the index of DEF in RDG. */
4597 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4599 use_operand_p imm_use_p;
4600 imm_use_iterator iterator;
4602 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4604 struct graph_edge *e;
4605 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4610 e = add_edge (rdg, idef, use);
4611 e->data = XNEW (struct rdg_edge);
4612 RDGE_TYPE (e) = flow_dd;
4616 /* Creates the edges of the reduced dependence graph RDG. */
4619 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4622 struct data_dependence_relation *ddr;
4623 def_operand_p def_p;
4626 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4627 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4628 create_rdg_edge_for_ddr (rdg, ddr);
4630 for (i = 0; i < rdg->n_vertices; i++)
4631 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4633 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4636 /* Build the vertices of the reduced dependence graph RDG. */
4639 create_rdg_vertices (struct graph *rdg, VEC (tree, heap) *stmts)
4644 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
4646 VEC (data_ref_loc, heap) *references;
4648 struct vertex *v = &(rdg->vertices[i]);
4649 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4650 struct rdg_vertex_info **slot;
4654 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4661 v->data = XNEW (struct rdg_vertex);
4662 RDG_STMT (rdg, i) = stmt;
4664 RDG_MEM_WRITE_STMT (rdg, i) = false;
4665 RDG_MEM_READS_STMT (rdg, i) = false;
4666 if (TREE_CODE (stmt) == PHI_NODE)
4669 get_references_in_stmt (stmt, &references);
4670 for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
4672 RDG_MEM_WRITE_STMT (rdg, i) = true;
4674 RDG_MEM_READS_STMT (rdg, i) = true;
4676 VEC_free (data_ref_loc, heap, references);
4680 /* Initialize STMTS with all the statements of LOOP. When
4681 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4682 which we discover statements is important as
4683 generate_loops_for_partition is using the same traversal for
4684 identifying statements. */
4687 stmts_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4690 basic_block *bbs = get_loop_body_in_dom_order (loop);
4692 for (i = 0; i < loop->num_nodes; i++)
4695 basic_block bb = bbs[i];
4696 block_stmt_iterator bsi;
4698 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4699 VEC_safe_push (tree, heap, *stmts, phi);
4701 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4702 if (TREE_CODE (stmt = bsi_stmt (bsi)) != LABEL_EXPR)
4703 VEC_safe_push (tree, heap, *stmts, stmt);
4709 /* Returns true when all the dependences are computable. */
4712 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4717 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4718 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4724 /* Computes a hash function for element ELT. */
4727 hash_stmt_vertex_info (const void *elt)
4729 struct rdg_vertex_info *rvi = (struct rdg_vertex_info *) elt;
4730 tree stmt = rvi->stmt;
4732 return htab_hash_pointer (stmt);
4735 /* Compares database elements E1 and E2. */
4738 eq_stmt_vertex_info (const void *e1, const void *e2)
4740 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4741 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4743 return elt1->stmt == elt2->stmt;
4746 /* Free the element E. */
4749 hash_stmt_vertex_del (void *e)
4754 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4755 statement of the loop nest, and one edge per data dependence or
4756 scalar dependence. */
4759 build_rdg (struct loop *loop)
4761 int nb_data_refs = 10;
4762 struct graph *rdg = NULL;
4763 VEC (ddr_p, heap) *dependence_relations;
4764 VEC (data_reference_p, heap) *datarefs;
4765 VEC (tree, heap) *stmts = VEC_alloc (tree, heap, nb_data_refs);
4767 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4768 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4769 compute_data_dependences_for_loop (loop,
4772 &dependence_relations);
4774 if (!known_dependences_p (dependence_relations))
4777 stmts_from_loop (loop, &stmts);
4778 rdg = new_graph (VEC_length (tree, stmts));
4780 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4781 eq_stmt_vertex_info, hash_stmt_vertex_del);
4782 create_rdg_vertices (rdg, stmts);
4783 create_rdg_edges (rdg, dependence_relations);
4786 free_dependence_relations (dependence_relations);
4787 free_data_refs (datarefs);
4788 VEC_free (tree, heap, stmts);
4793 /* Free the reduced dependence graph RDG. */
4796 free_rdg (struct graph *rdg)
4800 for (i = 0; i < rdg->n_vertices; i++)
4801 free (rdg->vertices[i].data);
4803 htab_delete (rdg->indices);
4807 /* Initialize STMTS with all the statements of LOOP that contain a
4811 stores_from_loop (struct loop *loop, VEC (tree, heap) **stmts)
4814 basic_block *bbs = get_loop_body_in_dom_order (loop);
4816 for (i = 0; i < loop->num_nodes; i++)
4818 basic_block bb = bbs[i];
4819 block_stmt_iterator bsi;
4821 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4822 if (!ZERO_SSA_OPERANDS (bsi_stmt (bsi), SSA_OP_VDEF))
4823 VEC_safe_push (tree, heap, *stmts, bsi_stmt (bsi));
4829 /* For a data reference REF, return the declaration of its base
4830 address or NULL_TREE if the base is not determined. */
4833 ref_base_address (tree stmt, data_ref_loc *ref)
4835 tree base = NULL_TREE;
4837 struct data_reference *dr = XCNEW (struct data_reference);
4839 DR_STMT (dr) = stmt;
4840 DR_REF (dr) = *ref->pos;
4841 dr_analyze_innermost (dr);
4842 base_address = DR_BASE_ADDRESS (dr);
4847 switch (TREE_CODE (base_address))
4850 base = TREE_OPERAND (base_address, 0);
4854 base = base_address;
4863 /* Determines whether the statement from vertex V of the RDG has a
4864 definition used outside the loop that contains this statement. */
4867 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
4869 tree stmt = RDG_STMT (rdg, v);
4870 struct loop *loop = loop_containing_stmt (stmt);
4871 use_operand_p imm_use_p;
4872 imm_use_iterator iterator;
4874 def_operand_p def_p;
4879 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
4881 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
4883 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
4891 /* Determines whether statements S1 and S2 access to similar memory
4892 locations. Two memory accesses are considered similar when they
4893 have the same base address declaration, i.e. when their
4894 ref_base_address is the same. */
4897 have_similar_memory_accesses (tree s1, tree s2)
4901 VEC (data_ref_loc, heap) *refs1, *refs2;
4902 data_ref_loc *ref1, *ref2;
4904 get_references_in_stmt (s1, &refs1);
4905 get_references_in_stmt (s2, &refs2);
4907 for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
4909 tree base1 = ref_base_address (s1, ref1);
4912 for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
4913 if (base1 == ref_base_address (s2, ref2))
4921 VEC_free (data_ref_loc, heap, refs1);
4922 VEC_free (data_ref_loc, heap, refs2);
4926 /* Helper function for the hashtab. */
4929 have_similar_memory_accesses_1 (const void *s1, const void *s2)
4931 return have_similar_memory_accesses ((tree) s1, (tree) s2);
4934 /* Helper function for the hashtab. */
4937 ref_base_address_1 (const void *s)
4939 tree stmt = (tree) s;
4941 VEC (data_ref_loc, heap) *refs;
4945 get_references_in_stmt (stmt, &refs);
4947 for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
4950 res = htab_hash_pointer (ref_base_address (stmt, ref));
4954 VEC_free (data_ref_loc, heap, refs);
4958 /* Try to remove duplicated write data references from STMTS. */
4961 remove_similar_memory_refs (VEC (tree, heap) **stmts)
4965 htab_t seen = htab_create (VEC_length (tree, *stmts), ref_base_address_1,
4966 have_similar_memory_accesses_1, NULL);
4968 for (i = 0; VEC_iterate (tree, *stmts, i, stmt); )
4972 slot = htab_find_slot (seen, stmt, INSERT);
4975 VEC_ordered_remove (tree, *stmts, i);
4978 *slot = (void *) stmt;