1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
84 /* These RTL headers are needed for basic-block.h. */
86 #include "basic-block.h"
87 #include "diagnostic.h"
88 #include "tree-flow.h"
89 #include "tree-dump.h"
92 #include "tree-chrec.h"
93 #include "tree-data-ref.h"
94 #include "tree-scalar-evolution.h"
95 #include "tree-pass.h"
96 #include "langhooks.h"
98 static struct datadep_stats
100 int num_dependence_tests;
101 int num_dependence_dependent;
102 int num_dependence_independent;
103 int num_dependence_undetermined;
105 int num_subscript_tests;
106 int num_subscript_undetermined;
107 int num_same_subscript_function;
110 int num_ziv_independent;
111 int num_ziv_dependent;
112 int num_ziv_unimplemented;
115 int num_siv_independent;
116 int num_siv_dependent;
117 int num_siv_unimplemented;
120 int num_miv_independent;
121 int num_miv_dependent;
122 int num_miv_unimplemented;
125 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
126 struct data_reference *,
127 struct data_reference *);
128 /* Returns true iff A divides B. */
131 tree_fold_divides_p (tree a, tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into FILE all the dependence relations from DDRS. */
163 dump_data_dependence_relations (FILE *file,
164 VEC (ddr_p, heap) *ddrs)
167 struct data_dependence_relation *ddr;
169 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
170 dump_data_dependence_relation (file, ddr);
173 /* Dump function for a DATA_REFERENCE structure. */
176 dump_data_reference (FILE *outf,
177 struct data_reference *dr)
181 fprintf (outf, "(Data Ref: \n stmt: ");
182 print_generic_stmt (outf, DR_STMT (dr), 0);
183 fprintf (outf, " ref: ");
184 print_generic_stmt (outf, DR_REF (dr), 0);
185 fprintf (outf, " base_object: ");
186 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
190 fprintf (outf, " Access function %d: ", i);
191 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
193 fprintf (outf, ")\n");
196 /* Dumps the affine function described by FN to the file OUTF. */
199 dump_affine_function (FILE *outf, affine_fn fn)
204 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
205 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
207 fprintf (outf, " + ");
208 print_generic_expr (outf, coef, TDF_SLIM);
209 fprintf (outf, " * x_%u", i);
213 /* Dumps the conflict function CF to the file OUTF. */
216 dump_conflict_function (FILE *outf, conflict_function *cf)
220 if (cf->n == NO_DEPENDENCE)
221 fprintf (outf, "no dependence\n");
222 else if (cf->n == NOT_KNOWN)
223 fprintf (outf, "not known\n");
226 for (i = 0; i < cf->n; i++)
229 dump_affine_function (outf, cf->fns[i]);
230 fprintf (outf, "]\n");
235 /* Dump function for a SUBSCRIPT structure. */
238 dump_subscript (FILE *outf, struct subscript *subscript)
240 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
242 fprintf (outf, "\n (subscript \n");
243 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
244 dump_conflict_function (outf, cf);
245 if (CF_NONTRIVIAL_P (cf))
247 tree last_iteration = SUB_LAST_CONFLICT (subscript);
248 fprintf (outf, " last_conflict: ");
249 print_generic_stmt (outf, last_iteration, 0);
252 cf = SUB_CONFLICTS_IN_B (subscript);
253 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
254 dump_conflict_function (outf, cf);
255 if (CF_NONTRIVIAL_P (cf))
257 tree last_iteration = SUB_LAST_CONFLICT (subscript);
258 fprintf (outf, " last_conflict: ");
259 print_generic_stmt (outf, last_iteration, 0);
262 fprintf (outf, " (Subscript distance: ");
263 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
264 fprintf (outf, " )\n");
265 fprintf (outf, " )\n");
268 /* Print the classic direction vector DIRV to OUTF. */
271 print_direction_vector (FILE *outf,
277 for (eq = 0; eq < length; eq++)
279 enum data_dependence_direction dir = dirv[eq];
284 fprintf (outf, " +");
287 fprintf (outf, " -");
290 fprintf (outf, " =");
292 case dir_positive_or_equal:
293 fprintf (outf, " +=");
295 case dir_positive_or_negative:
296 fprintf (outf, " +-");
298 case dir_negative_or_equal:
299 fprintf (outf, " -=");
302 fprintf (outf, " *");
305 fprintf (outf, "indep");
309 fprintf (outf, "\n");
312 /* Print a vector of direction vectors. */
315 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
321 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
322 print_direction_vector (outf, v, length);
325 /* Print a vector of distance vectors. */
328 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
334 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
335 print_lambda_vector (outf, v, length);
341 debug_data_dependence_relation (struct data_dependence_relation *ddr)
343 dump_data_dependence_relation (stderr, ddr);
346 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
349 dump_data_dependence_relation (FILE *outf,
350 struct data_dependence_relation *ddr)
352 struct data_reference *dra, *drb;
356 fprintf (outf, "(Data Dep: \n");
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 fprintf (outf, " (don't know)\n");
360 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
361 fprintf (outf, " (no dependence)\n");
363 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
368 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
370 fprintf (outf, " access_fn_A: ");
371 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
372 fprintf (outf, " access_fn_B: ");
373 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
374 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
377 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
378 fprintf (outf, " loop nest: (");
379 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
380 fprintf (outf, "%d ", loopi->num);
381 fprintf (outf, ")\n");
383 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
385 fprintf (outf, " distance_vector: ");
386 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
390 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
392 fprintf (outf, " direction_vector: ");
393 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
398 fprintf (outf, ")\n");
401 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
404 dump_data_dependence_direction (FILE *file,
405 enum data_dependence_direction dir)
421 case dir_positive_or_negative:
422 fprintf (file, "+-");
425 case dir_positive_or_equal:
426 fprintf (file, "+=");
429 case dir_negative_or_equal:
430 fprintf (file, "-=");
442 /* Dumps the distance and direction vectors in FILE. DDRS contains
443 the dependence relations, and VECT_SIZE is the size of the
444 dependence vectors, or in other words the number of loops in the
448 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
451 struct data_dependence_relation *ddr;
454 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
455 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
457 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
459 fprintf (file, "DISTANCE_V (");
460 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
461 fprintf (file, ")\n");
464 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
466 fprintf (file, "DIRECTION_V (");
467 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
468 fprintf (file, ")\n");
472 fprintf (file, "\n\n");
475 /* Dumps the data dependence relations DDRS in FILE. */
478 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
481 struct data_dependence_relation *ddr;
483 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
484 dump_data_dependence_relation (file, ddr);
486 fprintf (file, "\n\n");
489 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
490 will be ssizetype. */
493 split_constant_offset (tree exp, tree *var, tree *off)
495 tree type = TREE_TYPE (exp), otype;
501 otype = TREE_TYPE (exp);
503 switch (TREE_CODE (exp))
506 *var = build_int_cst (type, 0);
507 *off = fold_convert (ssizetype, exp);
512 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
513 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
514 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
516 *off = size_binop (TREE_CODE (exp), off0, off1);
520 off1 = TREE_OPERAND (exp, 1);
521 if (TREE_CODE (off1) != INTEGER_CST)
524 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
525 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
527 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
532 tree op, base, poffset;
533 HOST_WIDE_INT pbitsize, pbitpos;
534 enum machine_mode pmode;
535 int punsignedp, pvolatilep;
537 op = TREE_OPERAND (exp, 0);
538 if (!handled_component_p (op))
541 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
542 &pmode, &punsignedp, &pvolatilep, false);
544 if (pbitpos % BITS_PER_UNIT != 0)
546 base = build_fold_addr_expr (base);
547 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
551 split_constant_offset (poffset, &poffset, &off1);
552 off0 = size_binop (PLUS_EXPR, off0, off1);
553 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base),
555 fold_convert (TREE_TYPE (base), poffset));
558 *var = fold_convert (type, base);
567 *off = ssize_int (0);
570 /* Returns the address ADDR of an object in a canonical shape (without nop
571 casts, and with type of pointer to the object). */
574 canonicalize_base_object_address (tree addr)
578 if (TREE_CODE (addr) != ADDR_EXPR)
581 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
584 /* Analyzes the behavior of the memory reference DR in the innermost loop that
588 dr_analyze_innermost (struct data_reference *dr)
590 tree stmt = DR_STMT (dr);
591 struct loop *loop = loop_containing_stmt (stmt);
592 tree ref = DR_REF (dr);
593 HOST_WIDE_INT pbitsize, pbitpos;
595 enum machine_mode pmode;
596 int punsignedp, pvolatilep;
597 affine_iv base_iv, offset_iv;
598 tree init, dinit, step;
600 if (dump_file && (dump_flags & TDF_DETAILS))
601 fprintf (dump_file, "analyze_innermost: ");
603 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
604 &pmode, &punsignedp, &pvolatilep, false);
605 gcc_assert (base != NULL_TREE);
607 if (pbitpos % BITS_PER_UNIT != 0)
609 if (dump_file && (dump_flags & TDF_DETAILS))
610 fprintf (dump_file, "failed: bit offset alignment.\n");
614 base = build_fold_addr_expr (base);
615 if (!simple_iv (loop, stmt, base, &base_iv, false))
617 if (dump_file && (dump_flags & TDF_DETAILS))
618 fprintf (dump_file, "failed: evolution of base is not affine.\n");
623 offset_iv.base = ssize_int (0);
624 offset_iv.step = ssize_int (0);
626 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
628 if (dump_file && (dump_flags & TDF_DETAILS))
629 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
633 init = ssize_int (pbitpos / BITS_PER_UNIT);
634 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
635 init = size_binop (PLUS_EXPR, init, dinit);
636 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
637 init = size_binop (PLUS_EXPR, init, dinit);
639 step = size_binop (PLUS_EXPR,
640 fold_convert (ssizetype, base_iv.step),
641 fold_convert (ssizetype, offset_iv.step));
643 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
645 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
649 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
651 if (dump_file && (dump_flags & TDF_DETAILS))
652 fprintf (dump_file, "success.\n");
655 /* Determines the base object and the list of indices of memory reference
656 DR, analysed in loop nest NEST. */
659 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
661 tree stmt = DR_STMT (dr);
662 struct loop *loop = loop_containing_stmt (stmt);
663 VEC (tree, heap) *access_fns = NULL;
664 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
665 tree base, off, access_fn;
667 while (handled_component_p (aref))
669 if (TREE_CODE (aref) == ARRAY_REF)
671 op = TREE_OPERAND (aref, 1);
672 access_fn = analyze_scalar_evolution (loop, op);
673 access_fn = resolve_mixers (nest, access_fn);
674 VEC_safe_push (tree, heap, access_fns, access_fn);
676 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
679 aref = TREE_OPERAND (aref, 0);
682 if (INDIRECT_REF_P (aref))
684 op = TREE_OPERAND (aref, 0);
685 access_fn = analyze_scalar_evolution (loop, op);
686 access_fn = resolve_mixers (nest, access_fn);
687 base = initial_condition (access_fn);
688 split_constant_offset (base, &base, &off);
689 access_fn = chrec_replace_initial_condition (access_fn,
690 fold_convert (TREE_TYPE (base), off));
692 TREE_OPERAND (aref, 0) = base;
693 VEC_safe_push (tree, heap, access_fns, access_fn);
696 DR_BASE_OBJECT (dr) = ref;
697 DR_ACCESS_FNS (dr) = access_fns;
700 /* Extracts the alias analysis information from the memory reference DR. */
703 dr_analyze_alias (struct data_reference *dr)
705 tree stmt = DR_STMT (dr);
706 tree ref = DR_REF (dr);
707 tree base = get_base_address (ref), addr, smt = NULL_TREE;
714 else if (INDIRECT_REF_P (base))
716 addr = TREE_OPERAND (base, 0);
717 if (TREE_CODE (addr) == SSA_NAME)
719 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
720 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
724 DR_SYMBOL_TAG (dr) = smt;
725 if (var_can_have_subvars (smt))
726 DR_SUBVARS (dr) = get_subvars_for_var (smt);
728 vops = BITMAP_ALLOC (NULL);
729 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
731 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
737 /* Returns true if the address of DR is invariant. */
740 dr_address_invariant_p (struct data_reference *dr)
745 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
746 if (tree_contains_chrecs (idx, NULL))
752 /* Frees data reference DR. */
755 free_data_ref (data_reference_p dr)
757 BITMAP_FREE (DR_VOPS (dr));
758 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
762 /* Analyzes memory reference MEMREF accessed in STMT. The reference
763 is read if IS_READ is true, write otherwise. Returns the
764 data_reference description of MEMREF. NEST is the outermost loop of the
765 loop nest in that the reference should be analysed. */
767 static struct data_reference *
768 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
770 struct data_reference *dr;
772 if (dump_file && (dump_flags & TDF_DETAILS))
774 fprintf (dump_file, "Creating dr for ");
775 print_generic_expr (dump_file, memref, TDF_SLIM);
776 fprintf (dump_file, "\n");
779 dr = XCNEW (struct data_reference);
781 DR_REF (dr) = memref;
782 DR_IS_READ (dr) = is_read;
784 dr_analyze_innermost (dr);
785 dr_analyze_indices (dr, nest);
786 dr_analyze_alias (dr);
788 if (dump_file && (dump_flags & TDF_DETAILS))
790 fprintf (dump_file, "\tbase_address: ");
791 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
792 fprintf (dump_file, "\n\toffset from base address: ");
793 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
794 fprintf (dump_file, "\n\tconstant offset from base address: ");
795 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
796 fprintf (dump_file, "\n\tstep: ");
797 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
798 fprintf (dump_file, "\n\taligned to: ");
799 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
800 fprintf (dump_file, "\n\tbase_object: ");
801 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
802 fprintf (dump_file, "\n\tsymbol tag: ");
803 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
804 fprintf (dump_file, "\n");
807 /* FIXME -- data dependence analysis does not work correctly for objects with
808 invariant addresses. Let us fail here until the problem is fixed. */
809 if (dr_address_invariant_p (dr))
813 if (dump_file && (dump_flags & TDF_DETAILS))
814 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
820 /* Returns true if FNA == FNB. */
823 affine_function_equal_p (affine_fn fna, affine_fn fnb)
825 unsigned i, n = VEC_length (tree, fna);
827 if (n != VEC_length (tree, fnb))
830 for (i = 0; i < n; i++)
831 if (!operand_equal_p (VEC_index (tree, fna, i),
832 VEC_index (tree, fnb, i), 0))
838 /* If all the functions in CF are the same, returns one of them,
839 otherwise returns NULL. */
842 common_affine_function (conflict_function *cf)
847 if (!CF_NONTRIVIAL_P (cf))
852 for (i = 1; i < cf->n; i++)
853 if (!affine_function_equal_p (comm, cf->fns[i]))
859 /* Returns the base of the affine function FN. */
862 affine_function_base (affine_fn fn)
864 return VEC_index (tree, fn, 0);
867 /* Returns true if FN is a constant. */
870 affine_function_constant_p (affine_fn fn)
875 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
876 if (!integer_zerop (coef))
882 /* Returns true if FN is the zero constant function. */
885 affine_function_zero_p (affine_fn fn)
887 return (integer_zerop (affine_function_base (fn))
888 && affine_function_constant_p (fn));
891 /* Applies operation OP on affine functions FNA and FNB, and returns the
895 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
901 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
903 n = VEC_length (tree, fna);
904 m = VEC_length (tree, fnb);
908 n = VEC_length (tree, fnb);
909 m = VEC_length (tree, fna);
912 ret = VEC_alloc (tree, heap, m);
913 for (i = 0; i < n; i++)
914 VEC_quick_push (tree, ret,
915 fold_build2 (op, integer_type_node,
916 VEC_index (tree, fna, i),
917 VEC_index (tree, fnb, i)));
919 for (; VEC_iterate (tree, fna, i, coef); i++)
920 VEC_quick_push (tree, ret,
921 fold_build2 (op, integer_type_node,
922 coef, integer_zero_node));
923 for (; VEC_iterate (tree, fnb, i, coef); i++)
924 VEC_quick_push (tree, ret,
925 fold_build2 (op, integer_type_node,
926 integer_zero_node, coef));
931 /* Returns the sum of affine functions FNA and FNB. */
934 affine_fn_plus (affine_fn fna, affine_fn fnb)
936 return affine_fn_op (PLUS_EXPR, fna, fnb);
939 /* Returns the difference of affine functions FNA and FNB. */
942 affine_fn_minus (affine_fn fna, affine_fn fnb)
944 return affine_fn_op (MINUS_EXPR, fna, fnb);
947 /* Frees affine function FN. */
950 affine_fn_free (affine_fn fn)
952 VEC_free (tree, heap, fn);
955 /* Determine for each subscript in the data dependence relation DDR
959 compute_subscript_distance (struct data_dependence_relation *ddr)
961 conflict_function *cf_a, *cf_b;
962 affine_fn fn_a, fn_b, diff;
964 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
968 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
970 struct subscript *subscript;
972 subscript = DDR_SUBSCRIPT (ddr, i);
973 cf_a = SUB_CONFLICTS_IN_A (subscript);
974 cf_b = SUB_CONFLICTS_IN_B (subscript);
976 fn_a = common_affine_function (cf_a);
977 fn_b = common_affine_function (cf_b);
980 SUB_DISTANCE (subscript) = chrec_dont_know;
983 diff = affine_fn_minus (fn_a, fn_b);
985 if (affine_function_constant_p (diff))
986 SUB_DISTANCE (subscript) = affine_function_base (diff);
988 SUB_DISTANCE (subscript) = chrec_dont_know;
990 affine_fn_free (diff);
995 /* Returns the conflict function for "unknown". */
997 static conflict_function *
998 conflict_fn_not_known (void)
1000 conflict_function *fn = XCNEW (conflict_function);
1006 /* Returns the conflict function for "independent". */
1008 static conflict_function *
1009 conflict_fn_no_dependence (void)
1011 conflict_function *fn = XCNEW (conflict_function);
1012 fn->n = NO_DEPENDENCE;
1017 /* Returns true if the address of OBJ is invariant in LOOP. */
1020 object_address_invariant_in_loop_p (struct loop *loop, tree obj)
1022 while (handled_component_p (obj))
1024 if (TREE_CODE (obj) == ARRAY_REF)
1026 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1027 need to check the stride and the lower bound of the reference. */
1028 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1030 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1034 else if (TREE_CODE (obj) == COMPONENT_REF)
1036 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1040 obj = TREE_OPERAND (obj, 0);
1043 if (!INDIRECT_REF_P (obj))
1046 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1050 /* Returns true if A and B are accesses to different objects, or to different
1051 fields of the same object. */
1054 disjoint_objects_p (tree a, tree b)
1056 tree base_a, base_b;
1057 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1060 base_a = get_base_address (a);
1061 base_b = get_base_address (b);
1065 && base_a != base_b)
1068 if (!operand_equal_p (base_a, base_b, 0))
1071 /* Compare the component references of A and B. We must start from the inner
1072 ones, so record them to the vector first. */
1073 while (handled_component_p (a))
1075 VEC_safe_push (tree, heap, comp_a, a);
1076 a = TREE_OPERAND (a, 0);
1078 while (handled_component_p (b))
1080 VEC_safe_push (tree, heap, comp_b, b);
1081 b = TREE_OPERAND (b, 0);
1087 if (VEC_length (tree, comp_a) == 0
1088 || VEC_length (tree, comp_b) == 0)
1091 a = VEC_pop (tree, comp_a);
1092 b = VEC_pop (tree, comp_b);
1094 /* Real and imaginary part of a variable do not alias. */
1095 if ((TREE_CODE (a) == REALPART_EXPR
1096 && TREE_CODE (b) == IMAGPART_EXPR)
1097 || (TREE_CODE (a) == IMAGPART_EXPR
1098 && TREE_CODE (b) == REALPART_EXPR))
1104 if (TREE_CODE (a) != TREE_CODE (b))
1107 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1108 DR_BASE_OBJECT are always zero. */
1109 if (TREE_CODE (a) == ARRAY_REF)
1111 else if (TREE_CODE (a) == COMPONENT_REF)
1113 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1116 /* Different fields of unions may overlap. */
1117 base_a = TREE_OPERAND (a, 0);
1118 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1121 /* Different fields of structures cannot. */
1129 VEC_free (tree, heap, comp_a);
1130 VEC_free (tree, heap, comp_b);
1135 /* Returns false if we can prove that data references A and B do not alias,
1139 dr_may_alias_p (struct data_reference *a, struct data_reference *b)
1141 tree addr_a = DR_BASE_ADDRESS (a);
1142 tree addr_b = DR_BASE_ADDRESS (b);
1143 tree type_a, type_b;
1144 tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1146 /* If the sets of virtual operands are disjoint, the memory references do not
1148 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1151 /* If the accessed objects are disjoint, the memory references do not
1153 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1156 if (!addr_a || !addr_b)
1159 /* If the references are based on different static objects, they cannot alias
1160 (PTA should be able to disambiguate such accesses, but often it fails to,
1161 since currently we cannot distinguish between pointer and offset in pointer
1163 if (TREE_CODE (addr_a) == ADDR_EXPR
1164 && TREE_CODE (addr_b) == ADDR_EXPR)
1165 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1167 /* An instruction writing through a restricted pointer is "independent" of any
1168 instruction reading or writing through a different restricted pointer,
1169 in the same block/scope. */
1171 type_a = TREE_TYPE (addr_a);
1172 type_b = TREE_TYPE (addr_b);
1173 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1175 if (TREE_CODE (addr_a) == SSA_NAME)
1176 decl_a = SSA_NAME_VAR (addr_a);
1177 if (TREE_CODE (addr_b) == SSA_NAME)
1178 decl_b = SSA_NAME_VAR (addr_b);
1180 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1181 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1182 && decl_a && TREE_CODE (decl_a) == PARM_DECL
1183 && decl_b && TREE_CODE (decl_b) == PARM_DECL
1184 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1185 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1191 /* Initialize a data dependence relation between data accesses A and
1192 B. NB_LOOPS is the number of loops surrounding the references: the
1193 size of the classic distance/direction vectors. */
1195 static struct data_dependence_relation *
1196 initialize_data_dependence_relation (struct data_reference *a,
1197 struct data_reference *b,
1198 VEC (loop_p, heap) *loop_nest)
1200 struct data_dependence_relation *res;
1203 res = XNEW (struct data_dependence_relation);
1206 DDR_LOOP_NEST (res) = NULL;
1208 if (a == NULL || b == NULL)
1210 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1214 /* If the data references do not alias, then they are independent. */
1215 if (!dr_may_alias_p (a, b))
1217 DDR_ARE_DEPENDENT (res) = chrec_known;
1221 /* If the references do not access the same object, we do not know
1222 whether they alias or not. */
1223 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1225 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1229 /* If the base of the object is not invariant in the loop nest, we cannot
1230 analyse it. TODO -- in fact, it would suffice to record that there may
1231 be arbitrary depencences in the loops where the base object varies. */
1232 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1233 DR_BASE_OBJECT (a)))
1235 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1239 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1241 DDR_AFFINE_P (res) = true;
1242 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1243 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1244 DDR_LOOP_NEST (res) = loop_nest;
1245 DDR_INNER_LOOP (res) = 0;
1246 DDR_DIR_VECTS (res) = NULL;
1247 DDR_DIST_VECTS (res) = NULL;
1249 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1251 struct subscript *subscript;
1253 subscript = XNEW (struct subscript);
1254 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1255 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1256 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1257 SUB_DISTANCE (subscript) = chrec_dont_know;
1258 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1264 /* Frees memory used by the conflict function F. */
1267 free_conflict_function (conflict_function *f)
1271 if (CF_NONTRIVIAL_P (f))
1273 for (i = 0; i < f->n; i++)
1274 affine_fn_free (f->fns[i]);
1279 /* Frees memory used by SUBSCRIPTS. */
1282 free_subscripts (VEC (subscript_p, heap) *subscripts)
1287 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1289 free_conflict_function (s->conflicting_iterations_in_a);
1290 free_conflict_function (s->conflicting_iterations_in_b);
1292 VEC_free (subscript_p, heap, subscripts);
1295 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1299 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1302 if (dump_file && (dump_flags & TDF_DETAILS))
1304 fprintf (dump_file, "(dependence classified: ");
1305 print_generic_expr (dump_file, chrec, 0);
1306 fprintf (dump_file, ")\n");
1309 DDR_ARE_DEPENDENT (ddr) = chrec;
1310 free_subscripts (DDR_SUBSCRIPTS (ddr));
1313 /* The dependence relation DDR cannot be represented by a distance
1317 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1319 if (dump_file && (dump_flags & TDF_DETAILS))
1320 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1322 DDR_AFFINE_P (ddr) = false;
1327 /* This section contains the classic Banerjee tests. */
1329 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1330 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1333 ziv_subscript_p (tree chrec_a,
1336 return (evolution_function_is_constant_p (chrec_a)
1337 && evolution_function_is_constant_p (chrec_b));
1340 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1341 variable, i.e., if the SIV (Single Index Variable) test is true. */
1344 siv_subscript_p (tree chrec_a,
1347 if ((evolution_function_is_constant_p (chrec_a)
1348 && evolution_function_is_univariate_p (chrec_b))
1349 || (evolution_function_is_constant_p (chrec_b)
1350 && evolution_function_is_univariate_p (chrec_a)))
1353 if (evolution_function_is_univariate_p (chrec_a)
1354 && evolution_function_is_univariate_p (chrec_b))
1356 switch (TREE_CODE (chrec_a))
1358 case POLYNOMIAL_CHREC:
1359 switch (TREE_CODE (chrec_b))
1361 case POLYNOMIAL_CHREC:
1362 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1377 /* Creates a conflict function with N dimensions. The affine functions
1378 in each dimension follow. */
1380 static conflict_function *
1381 conflict_fn (unsigned n, ...)
1384 conflict_function *ret = XCNEW (conflict_function);
1387 gcc_assert (0 < n && n <= MAX_DIM);
1391 for (i = 0; i < n; i++)
1392 ret->fns[i] = va_arg (ap, affine_fn);
1398 /* Returns constant affine function with value CST. */
1401 affine_fn_cst (tree cst)
1403 affine_fn fn = VEC_alloc (tree, heap, 1);
1404 VEC_quick_push (tree, fn, cst);
1408 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1411 affine_fn_univar (tree cst, unsigned dim, tree coef)
1413 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1416 gcc_assert (dim > 0);
1417 VEC_quick_push (tree, fn, cst);
1418 for (i = 1; i < dim; i++)
1419 VEC_quick_push (tree, fn, integer_zero_node);
1420 VEC_quick_push (tree, fn, coef);
1424 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1425 *OVERLAPS_B are initialized to the functions that describe the
1426 relation between the elements accessed twice by CHREC_A and
1427 CHREC_B. For k >= 0, the following property is verified:
1429 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1432 analyze_ziv_subscript (tree chrec_a,
1434 conflict_function **overlaps_a,
1435 conflict_function **overlaps_b,
1436 tree *last_conflicts)
1439 dependence_stats.num_ziv++;
1441 if (dump_file && (dump_flags & TDF_DETAILS))
1442 fprintf (dump_file, "(analyze_ziv_subscript \n");
1444 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1445 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1446 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
1448 switch (TREE_CODE (difference))
1451 if (integer_zerop (difference))
1453 /* The difference is equal to zero: the accessed index
1454 overlaps for each iteration in the loop. */
1455 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1456 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1457 *last_conflicts = chrec_dont_know;
1458 dependence_stats.num_ziv_dependent++;
1462 /* The accesses do not overlap. */
1463 *overlaps_a = conflict_fn_no_dependence ();
1464 *overlaps_b = conflict_fn_no_dependence ();
1465 *last_conflicts = integer_zero_node;
1466 dependence_stats.num_ziv_independent++;
1471 /* We're not sure whether the indexes overlap. For the moment,
1472 conservatively answer "don't know". */
1473 if (dump_file && (dump_flags & TDF_DETAILS))
1474 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1476 *overlaps_a = conflict_fn_not_known ();
1477 *overlaps_b = conflict_fn_not_known ();
1478 *last_conflicts = chrec_dont_know;
1479 dependence_stats.num_ziv_unimplemented++;
1483 if (dump_file && (dump_flags & TDF_DETAILS))
1484 fprintf (dump_file, ")\n");
1487 /* Sets NIT to the estimated number of executions of the statements in
1488 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1489 large as the number of iterations. If we have no reliable estimate,
1490 the function returns false, otherwise returns true. */
1493 estimated_loop_iterations (struct loop *loop, bool conservative,
1496 estimate_numbers_of_iterations_loop (loop);
1499 if (!loop->any_upper_bound)
1502 *nit = loop->nb_iterations_upper_bound;
1506 if (!loop->any_estimate)
1509 *nit = loop->nb_iterations_estimate;
1515 /* Similar to estimated_loop_iterations, but returns the estimate only
1516 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1517 on the number of iterations of LOOP could not be derived, returns -1. */
1520 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1523 HOST_WIDE_INT hwi_nit;
1525 if (!estimated_loop_iterations (loop, conservative, &nit))
1528 if (!double_int_fits_in_shwi_p (nit))
1530 hwi_nit = double_int_to_shwi (nit);
1532 return hwi_nit < 0 ? -1 : hwi_nit;
1535 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1536 and only if it fits to the int type. If this is not the case, or the
1537 estimate on the number of iterations of LOOP could not be derived, returns
1541 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1546 if (!estimated_loop_iterations (loop, conservative, &nit))
1547 return chrec_dont_know;
1549 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1550 if (!double_int_fits_to_tree_p (type, nit))
1551 return chrec_dont_know;
1553 return double_int_to_tree (type, nit);
1556 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1557 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1558 *OVERLAPS_B are initialized to the functions that describe the
1559 relation between the elements accessed twice by CHREC_A and
1560 CHREC_B. For k >= 0, the following property is verified:
1562 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1565 analyze_siv_subscript_cst_affine (tree chrec_a,
1567 conflict_function **overlaps_a,
1568 conflict_function **overlaps_b,
1569 tree *last_conflicts)
1571 bool value0, value1, value2;
1572 tree difference, tmp;
1574 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1575 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1576 difference = chrec_fold_minus
1577 (integer_type_node, initial_condition (chrec_b), chrec_a);
1579 if (!chrec_is_positive (initial_condition (difference), &value0))
1581 if (dump_file && (dump_flags & TDF_DETAILS))
1582 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1584 dependence_stats.num_siv_unimplemented++;
1585 *overlaps_a = conflict_fn_not_known ();
1586 *overlaps_b = conflict_fn_not_known ();
1587 *last_conflicts = chrec_dont_know;
1592 if (value0 == false)
1594 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1596 if (dump_file && (dump_flags & TDF_DETAILS))
1597 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1599 *overlaps_a = conflict_fn_not_known ();
1600 *overlaps_b = conflict_fn_not_known ();
1601 *last_conflicts = chrec_dont_know;
1602 dependence_stats.num_siv_unimplemented++;
1611 chrec_b = {10, +, 1}
1614 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1616 HOST_WIDE_INT numiter;
1617 struct loop *loop = get_chrec_loop (chrec_b);
1619 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1620 tmp = fold_build2 (EXACT_DIV_EXPR, integer_type_node,
1621 fold_build1 (ABS_EXPR,
1624 CHREC_RIGHT (chrec_b));
1625 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1626 *last_conflicts = integer_one_node;
1629 /* Perform weak-zero siv test to see if overlap is
1630 outside the loop bounds. */
1631 numiter = estimated_loop_iterations_int (loop, true);
1634 && compare_tree_int (tmp, numiter) > 0)
1636 free_conflict_function (*overlaps_a);
1637 free_conflict_function (*overlaps_b);
1638 *overlaps_a = conflict_fn_no_dependence ();
1639 *overlaps_b = conflict_fn_no_dependence ();
1640 *last_conflicts = integer_zero_node;
1641 dependence_stats.num_siv_independent++;
1644 dependence_stats.num_siv_dependent++;
1648 /* When the step does not divide the difference, there are
1652 *overlaps_a = conflict_fn_no_dependence ();
1653 *overlaps_b = conflict_fn_no_dependence ();
1654 *last_conflicts = integer_zero_node;
1655 dependence_stats.num_siv_independent++;
1664 chrec_b = {10, +, -1}
1666 In this case, chrec_a will not overlap with chrec_b. */
1667 *overlaps_a = conflict_fn_no_dependence ();
1668 *overlaps_b = conflict_fn_no_dependence ();
1669 *last_conflicts = integer_zero_node;
1670 dependence_stats.num_siv_independent++;
1677 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1679 if (dump_file && (dump_flags & TDF_DETAILS))
1680 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1682 *overlaps_a = conflict_fn_not_known ();
1683 *overlaps_b = conflict_fn_not_known ();
1684 *last_conflicts = chrec_dont_know;
1685 dependence_stats.num_siv_unimplemented++;
1690 if (value2 == false)
1694 chrec_b = {10, +, -1}
1696 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1698 HOST_WIDE_INT numiter;
1699 struct loop *loop = get_chrec_loop (chrec_b);
1701 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1702 tmp = fold_build2 (EXACT_DIV_EXPR,
1703 integer_type_node, difference,
1704 CHREC_RIGHT (chrec_b));
1705 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1706 *last_conflicts = integer_one_node;
1708 /* Perform weak-zero siv test to see if overlap is
1709 outside the loop bounds. */
1710 numiter = estimated_loop_iterations_int (loop, true);
1713 && compare_tree_int (tmp, numiter) > 0)
1715 free_conflict_function (*overlaps_a);
1716 free_conflict_function (*overlaps_b);
1717 *overlaps_a = conflict_fn_no_dependence ();
1718 *overlaps_b = conflict_fn_no_dependence ();
1719 *last_conflicts = integer_zero_node;
1720 dependence_stats.num_siv_independent++;
1723 dependence_stats.num_siv_dependent++;
1727 /* When the step does not divide the difference, there
1731 *overlaps_a = conflict_fn_no_dependence ();
1732 *overlaps_b = conflict_fn_no_dependence ();
1733 *last_conflicts = integer_zero_node;
1734 dependence_stats.num_siv_independent++;
1744 In this case, chrec_a will not overlap with chrec_b. */
1745 *overlaps_a = conflict_fn_no_dependence ();
1746 *overlaps_b = conflict_fn_no_dependence ();
1747 *last_conflicts = integer_zero_node;
1748 dependence_stats.num_siv_independent++;
1756 /* Helper recursive function for initializing the matrix A. Returns
1757 the initial value of CHREC. */
1760 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1764 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1765 return int_cst_value (chrec);
1767 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1768 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1771 #define FLOOR_DIV(x,y) ((x) / (y))
1773 /* Solves the special case of the Diophantine equation:
1774 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1776 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1777 number of iterations that loops X and Y run. The overlaps will be
1778 constructed as evolutions in dimension DIM. */
1781 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1782 affine_fn *overlaps_a,
1783 affine_fn *overlaps_b,
1784 tree *last_conflicts, int dim)
1786 if (((step_a > 0 && step_b > 0)
1787 || (step_a < 0 && step_b < 0)))
1789 int step_overlaps_a, step_overlaps_b;
1790 int gcd_steps_a_b, last_conflict, tau2;
1792 gcd_steps_a_b = gcd (step_a, step_b);
1793 step_overlaps_a = step_b / gcd_steps_a_b;
1794 step_overlaps_b = step_a / gcd_steps_a_b;
1796 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1797 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1798 last_conflict = tau2;
1800 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1801 build_int_cst (NULL_TREE,
1803 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1804 build_int_cst (NULL_TREE,
1806 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1811 *overlaps_a = affine_fn_cst (integer_zero_node);
1812 *overlaps_b = affine_fn_cst (integer_zero_node);
1813 *last_conflicts = integer_zero_node;
1817 /* Solves the special case of a Diophantine equation where CHREC_A is
1818 an affine bivariate function, and CHREC_B is an affine univariate
1819 function. For example,
1821 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1823 has the following overlapping functions:
1825 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1826 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1827 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1829 FORNOW: This is a specialized implementation for a case occurring in
1830 a common benchmark. Implement the general algorithm. */
1833 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1834 conflict_function **overlaps_a,
1835 conflict_function **overlaps_b,
1836 tree *last_conflicts)
1838 bool xz_p, yz_p, xyz_p;
1839 int step_x, step_y, step_z;
1840 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1841 affine_fn overlaps_a_xz, overlaps_b_xz;
1842 affine_fn overlaps_a_yz, overlaps_b_yz;
1843 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1844 affine_fn ova1, ova2, ovb;
1845 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1847 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1848 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1849 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1851 niter_x = estimated_loop_iterations_int
1852 (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
1853 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), true);
1854 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), true);
1856 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1858 if (dump_file && (dump_flags & TDF_DETAILS))
1859 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1861 *overlaps_a = conflict_fn_not_known ();
1862 *overlaps_b = conflict_fn_not_known ();
1863 *last_conflicts = chrec_dont_know;
1867 niter = MIN (niter_x, niter_z);
1868 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1871 &last_conflicts_xz, 1);
1872 niter = MIN (niter_y, niter_z);
1873 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1876 &last_conflicts_yz, 2);
1877 niter = MIN (niter_x, niter_z);
1878 niter = MIN (niter_y, niter);
1879 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1882 &last_conflicts_xyz, 3);
1884 xz_p = !integer_zerop (last_conflicts_xz);
1885 yz_p = !integer_zerop (last_conflicts_yz);
1886 xyz_p = !integer_zerop (last_conflicts_xyz);
1888 if (xz_p || yz_p || xyz_p)
1890 ova1 = affine_fn_cst (integer_zero_node);
1891 ova2 = affine_fn_cst (integer_zero_node);
1892 ovb = affine_fn_cst (integer_zero_node);
1895 affine_fn t0 = ova1;
1898 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1899 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1900 affine_fn_free (t0);
1901 affine_fn_free (t2);
1902 *last_conflicts = last_conflicts_xz;
1906 affine_fn t0 = ova2;
1909 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1910 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1911 affine_fn_free (t0);
1912 affine_fn_free (t2);
1913 *last_conflicts = last_conflicts_yz;
1917 affine_fn t0 = ova1;
1918 affine_fn t2 = ova2;
1921 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1922 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1923 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1924 affine_fn_free (t0);
1925 affine_fn_free (t2);
1926 affine_fn_free (t4);
1927 *last_conflicts = last_conflicts_xyz;
1929 *overlaps_a = conflict_fn (2, ova1, ova2);
1930 *overlaps_b = conflict_fn (1, ovb);
1934 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1935 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1936 *last_conflicts = integer_zero_node;
1939 affine_fn_free (overlaps_a_xz);
1940 affine_fn_free (overlaps_b_xz);
1941 affine_fn_free (overlaps_a_yz);
1942 affine_fn_free (overlaps_b_yz);
1943 affine_fn_free (overlaps_a_xyz);
1944 affine_fn_free (overlaps_b_xyz);
1947 /* Determines the overlapping elements due to accesses CHREC_A and
1948 CHREC_B, that are affine functions. This function cannot handle
1949 symbolic evolution functions, ie. when initial conditions are
1950 parameters, because it uses lambda matrices of integers. */
1953 analyze_subscript_affine_affine (tree chrec_a,
1955 conflict_function **overlaps_a,
1956 conflict_function **overlaps_b,
1957 tree *last_conflicts)
1959 unsigned nb_vars_a, nb_vars_b, dim;
1960 int init_a, init_b, gamma, gcd_alpha_beta;
1962 lambda_matrix A, U, S;
1964 if (eq_evolutions_p (chrec_a, chrec_b))
1966 /* The accessed index overlaps for each iteration in the
1968 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1969 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1970 *last_conflicts = chrec_dont_know;
1973 if (dump_file && (dump_flags & TDF_DETAILS))
1974 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
1976 /* For determining the initial intersection, we have to solve a
1977 Diophantine equation. This is the most time consuming part.
1979 For answering to the question: "Is there a dependence?" we have
1980 to prove that there exists a solution to the Diophantine
1981 equation, and that the solution is in the iteration domain,
1982 i.e. the solution is positive or zero, and that the solution
1983 happens before the upper bound loop.nb_iterations. Otherwise
1984 there is no dependence. This function outputs a description of
1985 the iterations that hold the intersections. */
1987 nb_vars_a = nb_vars_in_chrec (chrec_a);
1988 nb_vars_b = nb_vars_in_chrec (chrec_b);
1990 dim = nb_vars_a + nb_vars_b;
1991 U = lambda_matrix_new (dim, dim);
1992 A = lambda_matrix_new (dim, 1);
1993 S = lambda_matrix_new (dim, 1);
1995 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
1996 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
1997 gamma = init_b - init_a;
1999 /* Don't do all the hard work of solving the Diophantine equation
2000 when we already know the solution: for example,
2003 | gamma = 3 - 3 = 0.
2004 Then the first overlap occurs during the first iterations:
2005 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2009 if (nb_vars_a == 1 && nb_vars_b == 1)
2012 HOST_WIDE_INT niter, niter_a, niter_b;
2015 niter_a = estimated_loop_iterations_int
2016 (get_chrec_loop (chrec_a), true);
2017 niter_b = estimated_loop_iterations_int
2018 (get_chrec_loop (chrec_b), true);
2019 if (niter_a < 0 || niter_b < 0)
2021 if (dump_file && (dump_flags & TDF_DETAILS))
2022 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2023 *overlaps_a = conflict_fn_not_known ();
2024 *overlaps_b = conflict_fn_not_known ();
2025 *last_conflicts = chrec_dont_know;
2026 goto end_analyze_subs_aa;
2029 niter = MIN (niter_a, niter_b);
2031 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2032 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2034 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2037 *overlaps_a = conflict_fn (1, ova);
2038 *overlaps_b = conflict_fn (1, ovb);
2041 else if (nb_vars_a == 2 && nb_vars_b == 1)
2042 compute_overlap_steps_for_affine_1_2
2043 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2045 else if (nb_vars_a == 1 && nb_vars_b == 2)
2046 compute_overlap_steps_for_affine_1_2
2047 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2051 if (dump_file && (dump_flags & TDF_DETAILS))
2052 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2053 *overlaps_a = conflict_fn_not_known ();
2054 *overlaps_b = conflict_fn_not_known ();
2055 *last_conflicts = chrec_dont_know;
2057 goto end_analyze_subs_aa;
2061 lambda_matrix_right_hermite (A, dim, 1, S, U);
2066 lambda_matrix_row_negate (U, dim, 0);
2068 gcd_alpha_beta = S[0][0];
2070 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2071 but that is a quite strange case. Instead of ICEing, answer
2073 if (gcd_alpha_beta == 0)
2075 *overlaps_a = conflict_fn_not_known ();
2076 *overlaps_b = conflict_fn_not_known ();
2077 *last_conflicts = chrec_dont_know;
2078 goto end_analyze_subs_aa;
2081 /* The classic "gcd-test". */
2082 if (!int_divides_p (gcd_alpha_beta, gamma))
2084 /* The "gcd-test" has determined that there is no integer
2085 solution, i.e. there is no dependence. */
2086 *overlaps_a = conflict_fn_no_dependence ();
2087 *overlaps_b = conflict_fn_no_dependence ();
2088 *last_conflicts = integer_zero_node;
2091 /* Both access functions are univariate. This includes SIV and MIV cases. */
2092 else if (nb_vars_a == 1 && nb_vars_b == 1)
2094 /* Both functions should have the same evolution sign. */
2095 if (((A[0][0] > 0 && -A[1][0] > 0)
2096 || (A[0][0] < 0 && -A[1][0] < 0)))
2098 /* The solutions are given by:
2100 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2103 For a given integer t. Using the following variables,
2105 | i0 = u11 * gamma / gcd_alpha_beta
2106 | j0 = u12 * gamma / gcd_alpha_beta
2113 | y0 = j0 + j1 * t. */
2117 /* X0 and Y0 are the first iterations for which there is a
2118 dependence. X0, Y0 are two solutions of the Diophantine
2119 equation: chrec_a (X0) = chrec_b (Y0). */
2121 int niter, niter_a, niter_b;
2123 niter_a = estimated_loop_iterations_int
2124 (get_chrec_loop (chrec_a), true);
2125 niter_b = estimated_loop_iterations_int
2126 (get_chrec_loop (chrec_b), true);
2128 if (niter_a < 0 || niter_b < 0)
2130 if (dump_file && (dump_flags & TDF_DETAILS))
2131 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2132 *overlaps_a = conflict_fn_not_known ();
2133 *overlaps_b = conflict_fn_not_known ();
2134 *last_conflicts = chrec_dont_know;
2135 goto end_analyze_subs_aa;
2138 niter = MIN (niter_a, niter_b);
2140 i0 = U[0][0] * gamma / gcd_alpha_beta;
2141 j0 = U[0][1] * gamma / gcd_alpha_beta;
2145 if ((i1 == 0 && i0 < 0)
2146 || (j1 == 0 && j0 < 0))
2148 /* There is no solution.
2149 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2150 falls in here, but for the moment we don't look at the
2151 upper bound of the iteration domain. */
2152 *overlaps_a = conflict_fn_no_dependence ();
2153 *overlaps_b = conflict_fn_no_dependence ();
2154 *last_conflicts = integer_zero_node;
2161 tau1 = CEIL (-i0, i1);
2162 tau2 = FLOOR_DIV (niter - i0, i1);
2166 int last_conflict, min_multiple;
2167 tau1 = MAX (tau1, CEIL (-j0, j1));
2168 tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1));
2170 x0 = i1 * tau1 + i0;
2171 y0 = j1 * tau1 + j0;
2173 /* At this point (x0, y0) is one of the
2174 solutions to the Diophantine equation. The
2175 next step has to compute the smallest
2176 positive solution: the first conflicts. */
2177 min_multiple = MIN (x0 / i1, y0 / j1);
2178 x0 -= i1 * min_multiple;
2179 y0 -= j1 * min_multiple;
2181 tau1 = (x0 - i0)/i1;
2182 last_conflict = tau2 - tau1;
2184 /* If the overlap occurs outside of the bounds of the
2185 loop, there is no dependence. */
2186 if (x0 > niter || y0 > niter)
2188 *overlaps_a = conflict_fn_no_dependence ();
2189 *overlaps_b = conflict_fn_no_dependence ();
2190 *last_conflicts = integer_zero_node;
2196 affine_fn_univar (build_int_cst (NULL_TREE, x0),
2198 build_int_cst (NULL_TREE, i1)));
2201 affine_fn_univar (build_int_cst (NULL_TREE, y0),
2203 build_int_cst (NULL_TREE, j1)));
2204 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2209 /* FIXME: For the moment, the upper bound of the
2210 iteration domain for j is not checked. */
2211 if (dump_file && (dump_flags & TDF_DETAILS))
2212 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2213 *overlaps_a = conflict_fn_not_known ();
2214 *overlaps_b = conflict_fn_not_known ();
2215 *last_conflicts = chrec_dont_know;
2221 /* FIXME: For the moment, the upper bound of the
2222 iteration domain for i is not checked. */
2223 if (dump_file && (dump_flags & TDF_DETAILS))
2224 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2225 *overlaps_a = conflict_fn_not_known ();
2226 *overlaps_b = conflict_fn_not_known ();
2227 *last_conflicts = chrec_dont_know;
2233 if (dump_file && (dump_flags & TDF_DETAILS))
2234 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2235 *overlaps_a = conflict_fn_not_known ();
2236 *overlaps_b = conflict_fn_not_known ();
2237 *last_conflicts = chrec_dont_know;
2243 if (dump_file && (dump_flags & TDF_DETAILS))
2244 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2245 *overlaps_a = conflict_fn_not_known ();
2246 *overlaps_b = conflict_fn_not_known ();
2247 *last_conflicts = chrec_dont_know;
2250 end_analyze_subs_aa:
2251 if (dump_file && (dump_flags & TDF_DETAILS))
2253 fprintf (dump_file, " (overlaps_a = ");
2254 dump_conflict_function (dump_file, *overlaps_a);
2255 fprintf (dump_file, ")\n (overlaps_b = ");
2256 dump_conflict_function (dump_file, *overlaps_b);
2257 fprintf (dump_file, ")\n");
2258 fprintf (dump_file, ")\n");
2262 /* Returns true when analyze_subscript_affine_affine can be used for
2263 determining the dependence relation between chrec_a and chrec_b,
2264 that contain symbols. This function modifies chrec_a and chrec_b
2265 such that the analysis result is the same, and such that they don't
2266 contain symbols, and then can safely be passed to the analyzer.
2268 Example: The analysis of the following tuples of evolutions produce
2269 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2272 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2273 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2277 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2279 tree diff, type, left_a, left_b, right_b;
2281 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2282 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2283 /* FIXME: For the moment not handled. Might be refined later. */
2286 type = chrec_type (*chrec_a);
2287 left_a = CHREC_LEFT (*chrec_a);
2288 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2289 diff = chrec_fold_minus (type, left_a, left_b);
2291 if (!evolution_function_is_constant_p (diff))
2294 if (dump_file && (dump_flags & TDF_DETAILS))
2295 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2297 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2298 diff, CHREC_RIGHT (*chrec_a));
2299 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2300 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2301 build_int_cst (type, 0),
2306 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2307 *OVERLAPS_B are initialized to the functions that describe the
2308 relation between the elements accessed twice by CHREC_A and
2309 CHREC_B. For k >= 0, the following property is verified:
2311 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2314 analyze_siv_subscript (tree chrec_a,
2316 conflict_function **overlaps_a,
2317 conflict_function **overlaps_b,
2318 tree *last_conflicts)
2320 dependence_stats.num_siv++;
2322 if (dump_file && (dump_flags & TDF_DETAILS))
2323 fprintf (dump_file, "(analyze_siv_subscript \n");
2325 if (evolution_function_is_constant_p (chrec_a)
2326 && evolution_function_is_affine_p (chrec_b))
2327 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2328 overlaps_a, overlaps_b, last_conflicts);
2330 else if (evolution_function_is_affine_p (chrec_a)
2331 && evolution_function_is_constant_p (chrec_b))
2332 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2333 overlaps_b, overlaps_a, last_conflicts);
2335 else if (evolution_function_is_affine_p (chrec_a)
2336 && evolution_function_is_affine_p (chrec_b))
2338 if (!chrec_contains_symbols (chrec_a)
2339 && !chrec_contains_symbols (chrec_b))
2341 analyze_subscript_affine_affine (chrec_a, chrec_b,
2342 overlaps_a, overlaps_b,
2345 if (CF_NOT_KNOWN_P (*overlaps_a)
2346 || CF_NOT_KNOWN_P (*overlaps_b))
2347 dependence_stats.num_siv_unimplemented++;
2348 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2349 || CF_NO_DEPENDENCE_P (*overlaps_b))
2350 dependence_stats.num_siv_independent++;
2352 dependence_stats.num_siv_dependent++;
2354 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2357 analyze_subscript_affine_affine (chrec_a, chrec_b,
2358 overlaps_a, overlaps_b,
2360 /* FIXME: The number of iterations is a symbolic expression.
2361 Compute it properly. */
2362 *last_conflicts = chrec_dont_know;
2364 if (CF_NOT_KNOWN_P (*overlaps_a)
2365 || CF_NOT_KNOWN_P (*overlaps_b))
2366 dependence_stats.num_siv_unimplemented++;
2367 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2368 || CF_NO_DEPENDENCE_P (*overlaps_b))
2369 dependence_stats.num_siv_independent++;
2371 dependence_stats.num_siv_dependent++;
2374 goto siv_subscript_dontknow;
2379 siv_subscript_dontknow:;
2380 if (dump_file && (dump_flags & TDF_DETAILS))
2381 fprintf (dump_file, "siv test failed: unimplemented.\n");
2382 *overlaps_a = conflict_fn_not_known ();
2383 *overlaps_b = conflict_fn_not_known ();
2384 *last_conflicts = chrec_dont_know;
2385 dependence_stats.num_siv_unimplemented++;
2388 if (dump_file && (dump_flags & TDF_DETAILS))
2389 fprintf (dump_file, ")\n");
2392 /* Returns false if we can prove that the greatest common divisor of the steps
2393 of CHREC does not divide CST, false otherwise. */
2396 gcd_of_steps_may_divide_p (tree chrec, tree cst)
2398 HOST_WIDE_INT cd = 0, val;
2401 if (!host_integerp (cst, 0))
2403 val = tree_low_cst (cst, 0);
2405 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2407 step = CHREC_RIGHT (chrec);
2408 if (!host_integerp (step, 0))
2410 cd = gcd (cd, tree_low_cst (step, 0));
2411 chrec = CHREC_LEFT (chrec);
2414 return val % cd == 0;
2417 /* Analyze a MIV (Multiple Index Variable) subscript. *OVERLAPS_A and
2418 *OVERLAPS_B are initialized to the functions that describe the
2419 relation between the elements accessed twice by CHREC_A and
2420 CHREC_B. For k >= 0, the following property is verified:
2422 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2425 analyze_miv_subscript (tree chrec_a,
2427 conflict_function **overlaps_a,
2428 conflict_function **overlaps_b,
2429 tree *last_conflicts)
2431 /* FIXME: This is a MIV subscript, not yet handled.
2432 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2435 In the SIV test we had to solve a Diophantine equation with two
2436 variables. In the MIV case we have to solve a Diophantine
2437 equation with 2*n variables (if the subscript uses n IVs).
2440 dependence_stats.num_miv++;
2441 if (dump_file && (dump_flags & TDF_DETAILS))
2442 fprintf (dump_file, "(analyze_miv_subscript \n");
2444 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
2445 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
2446 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
2448 if (eq_evolutions_p (chrec_a, chrec_b))
2450 /* Access functions are the same: all the elements are accessed
2451 in the same order. */
2452 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2453 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2454 *last_conflicts = estimated_loop_iterations_tree
2455 (get_chrec_loop (chrec_a), true);
2456 dependence_stats.num_miv_dependent++;
2459 else if (evolution_function_is_constant_p (difference)
2460 /* For the moment, the following is verified:
2461 evolution_function_is_affine_multivariate_p (chrec_a) */
2462 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2464 /* testsuite/.../ssa-chrec-33.c
2465 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2467 The difference is 1, and all the evolution steps are multiples
2468 of 2, consequently there are no overlapping elements. */
2469 *overlaps_a = conflict_fn_no_dependence ();
2470 *overlaps_b = conflict_fn_no_dependence ();
2471 *last_conflicts = integer_zero_node;
2472 dependence_stats.num_miv_independent++;
2475 else if (evolution_function_is_affine_multivariate_p (chrec_a)
2476 && !chrec_contains_symbols (chrec_a)
2477 && evolution_function_is_affine_multivariate_p (chrec_b)
2478 && !chrec_contains_symbols (chrec_b))
2480 /* testsuite/.../ssa-chrec-35.c
2481 {0, +, 1}_2 vs. {0, +, 1}_3
2482 the overlapping elements are respectively located at iterations:
2483 {0, +, 1}_x and {0, +, 1}_x,
2484 in other words, we have the equality:
2485 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2488 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2489 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2491 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2492 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2494 analyze_subscript_affine_affine (chrec_a, chrec_b,
2495 overlaps_a, overlaps_b, last_conflicts);
2497 if (CF_NOT_KNOWN_P (*overlaps_a)
2498 || CF_NOT_KNOWN_P (*overlaps_b))
2499 dependence_stats.num_miv_unimplemented++;
2500 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2501 || CF_NO_DEPENDENCE_P (*overlaps_b))
2502 dependence_stats.num_miv_independent++;
2504 dependence_stats.num_miv_dependent++;
2509 /* When the analysis is too difficult, answer "don't know". */
2510 if (dump_file && (dump_flags & TDF_DETAILS))
2511 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2513 *overlaps_a = conflict_fn_not_known ();
2514 *overlaps_b = conflict_fn_not_known ();
2515 *last_conflicts = chrec_dont_know;
2516 dependence_stats.num_miv_unimplemented++;
2519 if (dump_file && (dump_flags & TDF_DETAILS))
2520 fprintf (dump_file, ")\n");
2523 /* Determines the iterations for which CHREC_A is equal to CHREC_B.
2524 OVERLAP_ITERATIONS_A and OVERLAP_ITERATIONS_B are initialized with
2525 two functions that describe the iterations that contain conflicting
2528 Remark: For an integer k >= 0, the following equality is true:
2530 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2534 analyze_overlapping_iterations (tree chrec_a,
2536 conflict_function **overlap_iterations_a,
2537 conflict_function **overlap_iterations_b,
2538 tree *last_conflicts)
2540 dependence_stats.num_subscript_tests++;
2542 if (dump_file && (dump_flags & TDF_DETAILS))
2544 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2545 fprintf (dump_file, " (chrec_a = ");
2546 print_generic_expr (dump_file, chrec_a, 0);
2547 fprintf (dump_file, ")\n (chrec_b = ");
2548 print_generic_expr (dump_file, chrec_b, 0);
2549 fprintf (dump_file, ")\n");
2552 if (chrec_a == NULL_TREE
2553 || chrec_b == NULL_TREE
2554 || chrec_contains_undetermined (chrec_a)
2555 || chrec_contains_undetermined (chrec_b))
2557 dependence_stats.num_subscript_undetermined++;
2559 *overlap_iterations_a = conflict_fn_not_known ();
2560 *overlap_iterations_b = conflict_fn_not_known ();
2563 /* If they are the same chrec, and are affine, they overlap
2564 on every iteration. */
2565 else if (eq_evolutions_p (chrec_a, chrec_b)
2566 && evolution_function_is_affine_multivariate_p (chrec_a))
2568 dependence_stats.num_same_subscript_function++;
2569 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2570 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2571 *last_conflicts = chrec_dont_know;
2574 /* If they aren't the same, and aren't affine, we can't do anything
2576 else if ((chrec_contains_symbols (chrec_a)
2577 || chrec_contains_symbols (chrec_b))
2578 && (!evolution_function_is_affine_multivariate_p (chrec_a)
2579 || !evolution_function_is_affine_multivariate_p (chrec_b)))
2581 dependence_stats.num_subscript_undetermined++;
2582 *overlap_iterations_a = conflict_fn_not_known ();
2583 *overlap_iterations_b = conflict_fn_not_known ();
2586 else if (ziv_subscript_p (chrec_a, chrec_b))
2587 analyze_ziv_subscript (chrec_a, chrec_b,
2588 overlap_iterations_a, overlap_iterations_b,
2591 else if (siv_subscript_p (chrec_a, chrec_b))
2592 analyze_siv_subscript (chrec_a, chrec_b,
2593 overlap_iterations_a, overlap_iterations_b,
2597 analyze_miv_subscript (chrec_a, chrec_b,
2598 overlap_iterations_a, overlap_iterations_b,
2601 if (dump_file && (dump_flags & TDF_DETAILS))
2603 fprintf (dump_file, " (overlap_iterations_a = ");
2604 dump_conflict_function (dump_file, *overlap_iterations_a);
2605 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2606 dump_conflict_function (dump_file, *overlap_iterations_b);
2607 fprintf (dump_file, ")\n");
2608 fprintf (dump_file, ")\n");
2612 /* Helper function for uniquely inserting distance vectors. */
2615 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2620 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2621 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2624 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2627 /* Helper function for uniquely inserting direction vectors. */
2630 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2635 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2636 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2639 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2642 /* Add a distance of 1 on all the loops outer than INDEX. If we
2643 haven't yet determined a distance for this outer loop, push a new
2644 distance vector composed of the previous distance, and a distance
2645 of 1 for this outer loop. Example:
2653 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2654 save (0, 1), then we have to save (1, 0). */
2657 add_outer_distances (struct data_dependence_relation *ddr,
2658 lambda_vector dist_v, int index)
2660 /* For each outer loop where init_v is not set, the accesses are
2661 in dependence of distance 1 in the loop. */
2662 while (--index >= 0)
2664 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2665 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2667 save_dist_v (ddr, save_v);
2671 /* Return false when fail to represent the data dependence as a
2672 distance vector. INIT_B is set to true when a component has been
2673 added to the distance vector DIST_V. INDEX_CARRY is then set to
2674 the index in DIST_V that carries the dependence. */
2677 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2678 struct data_reference *ddr_a,
2679 struct data_reference *ddr_b,
2680 lambda_vector dist_v, bool *init_b,
2684 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2686 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2688 tree access_fn_a, access_fn_b;
2689 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2691 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2693 non_affine_dependence_relation (ddr);
2697 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2698 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2700 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2701 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2704 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2705 DDR_LOOP_NEST (ddr));
2706 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2707 DDR_LOOP_NEST (ddr));
2709 /* The dependence is carried by the outermost loop. Example:
2716 In this case, the dependence is carried by loop_1. */
2717 index = index_a < index_b ? index_a : index_b;
2718 *index_carry = MIN (index, *index_carry);
2720 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2722 non_affine_dependence_relation (ddr);
2726 dist = int_cst_value (SUB_DISTANCE (subscript));
2728 /* This is the subscript coupling test. If we have already
2729 recorded a distance for this loop (a distance coming from
2730 another subscript), it should be the same. For example,
2731 in the following code, there is no dependence:
2738 if (init_v[index] != 0 && dist_v[index] != dist)
2740 finalize_ddr_dependent (ddr, chrec_known);
2744 dist_v[index] = dist;
2750 /* This can be for example an affine vs. constant dependence
2751 (T[i] vs. T[3]) that is not an affine dependence and is
2752 not representable as a distance vector. */
2753 non_affine_dependence_relation (ddr);
2761 /* Return true when the DDR contains two data references that have the
2762 same access functions. */
2765 same_access_functions (struct data_dependence_relation *ddr)
2769 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2770 if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
2771 DR_ACCESS_FN (DDR_B (ddr), i)))
2777 /* Return true when the DDR contains only constant access functions. */
2780 constant_access_functions (struct data_dependence_relation *ddr)
2784 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2785 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2786 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2793 /* Helper function for the case where DDR_A and DDR_B are the same
2794 multivariate access function. */
2797 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2800 tree c_1 = CHREC_LEFT (c_2);
2801 tree c_0 = CHREC_LEFT (c_1);
2802 lambda_vector dist_v;
2805 /* Polynomials with more than 2 variables are not handled yet. */
2806 if (TREE_CODE (c_0) != INTEGER_CST)
2808 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2812 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2813 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2815 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2816 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2817 v1 = int_cst_value (CHREC_RIGHT (c_1));
2818 v2 = int_cst_value (CHREC_RIGHT (c_2));
2831 save_dist_v (ddr, dist_v);
2833 add_outer_distances (ddr, dist_v, x_1);
2836 /* Helper function for the case where DDR_A and DDR_B are the same
2837 access functions. */
2840 add_other_self_distances (struct data_dependence_relation *ddr)
2842 lambda_vector dist_v;
2844 int index_carry = DDR_NB_LOOPS (ddr);
2846 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2848 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2850 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2852 if (!evolution_function_is_univariate_p (access_fun))
2854 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2856 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2860 add_multivariate_self_dist (ddr, DR_ACCESS_FN (DDR_A (ddr), 0));
2864 index_carry = MIN (index_carry,
2865 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2866 DDR_LOOP_NEST (ddr)));
2870 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2871 add_outer_distances (ddr, dist_v, index_carry);
2875 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2877 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2879 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2880 save_dist_v (ddr, dist_v);
2883 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2884 is the case for example when access functions are the same and
2885 equal to a constant, as in:
2892 in which case the distance vectors are (0) and (1). */
2895 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2899 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2901 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2902 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2903 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2905 for (j = 0; j < ca->n; j++)
2906 if (affine_function_zero_p (ca->fns[j]))
2908 insert_innermost_unit_dist_vector (ddr);
2912 for (j = 0; j < cb->n; j++)
2913 if (affine_function_zero_p (cb->fns[j]))
2915 insert_innermost_unit_dist_vector (ddr);
2921 /* Compute the classic per loop distance vector. DDR is the data
2922 dependence relation to build a vector from. Return false when fail
2923 to represent the data dependence as a distance vector. */
2926 build_classic_dist_vector (struct data_dependence_relation *ddr)
2928 bool init_b = false;
2929 int index_carry = DDR_NB_LOOPS (ddr);
2930 lambda_vector dist_v;
2932 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2935 if (same_access_functions (ddr))
2937 /* Save the 0 vector. */
2938 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2939 save_dist_v (ddr, dist_v);
2941 if (constant_access_functions (ddr))
2942 add_distance_for_zero_overlaps (ddr);
2944 if (DDR_NB_LOOPS (ddr) > 1)
2945 add_other_self_distances (ddr);
2950 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2951 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2952 dist_v, &init_b, &index_carry))
2955 /* Save the distance vector if we initialized one. */
2958 /* Verify a basic constraint: classic distance vectors should
2959 always be lexicographically positive.
2961 Data references are collected in the order of execution of
2962 the program, thus for the following loop
2964 | for (i = 1; i < 100; i++)
2965 | for (j = 1; j < 100; j++)
2967 | t = T[j+1][i-1]; // A
2968 | T[j][i] = t + 2; // B
2971 references are collected following the direction of the wind:
2972 A then B. The data dependence tests are performed also
2973 following this order, such that we're looking at the distance
2974 separating the elements accessed by A from the elements later
2975 accessed by B. But in this example, the distance returned by
2976 test_dep (A, B) is lexicographically negative (-1, 1), that
2977 means that the access A occurs later than B with respect to
2978 the outer loop, ie. we're actually looking upwind. In this
2979 case we solve test_dep (B, A) looking downwind to the
2980 lexicographically positive solution, that returns the
2981 distance vector (1, -1). */
2982 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
2984 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2985 subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr));
2986 compute_subscript_distance (ddr);
2987 build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
2988 save_v, &init_b, &index_carry);
2989 save_dist_v (ddr, save_v);
2991 /* In this case there is a dependence forward for all the
2994 | for (k = 1; k < 100; k++)
2995 | for (i = 1; i < 100; i++)
2996 | for (j = 1; j < 100; j++)
2998 | t = T[j+1][i-1]; // A
2999 | T[j][i] = t + 2; // B
3007 if (DDR_NB_LOOPS (ddr) > 1)
3009 add_outer_distances (ddr, save_v, index_carry);
3010 add_outer_distances (ddr, dist_v, index_carry);
3015 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3016 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3017 save_dist_v (ddr, save_v);
3019 if (DDR_NB_LOOPS (ddr) > 1)
3021 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3023 subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr));
3024 compute_subscript_distance (ddr);
3025 build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3026 opposite_v, &init_b, &index_carry);
3028 add_outer_distances (ddr, dist_v, index_carry);
3029 add_outer_distances (ddr, opposite_v, index_carry);
3035 /* There is a distance of 1 on all the outer loops: Example:
3036 there is a dependence of distance 1 on loop_1 for the array A.
3042 add_outer_distances (ddr, dist_v,
3043 lambda_vector_first_nz (dist_v,
3044 DDR_NB_LOOPS (ddr), 0));
3047 if (dump_file && (dump_flags & TDF_DETAILS))
3051 fprintf (dump_file, "(build_classic_dist_vector\n");
3052 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3054 fprintf (dump_file, " dist_vector = (");
3055 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3056 DDR_NB_LOOPS (ddr));
3057 fprintf (dump_file, " )\n");
3059 fprintf (dump_file, ")\n");
3065 /* Return the direction for a given distance.
3066 FIXME: Computing dir this way is suboptimal, since dir can catch
3067 cases that dist is unable to represent. */
3069 static inline enum data_dependence_direction
3070 dir_from_dist (int dist)
3073 return dir_positive;
3075 return dir_negative;
3080 /* Compute the classic per loop direction vector. DDR is the data
3081 dependence relation to build a vector from. */
3084 build_classic_dir_vector (struct data_dependence_relation *ddr)
3087 lambda_vector dist_v;
3089 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3091 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3093 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3094 dir_v[j] = dir_from_dist (dist_v[j]);
3096 save_dir_v (ddr, dir_v);
3100 /* Helper function. Returns true when there is a dependence between
3101 data references DRA and DRB. */
3104 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3105 struct data_reference *dra,
3106 struct data_reference *drb)
3109 tree last_conflicts;
3110 struct subscript *subscript;
3112 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3115 conflict_function *overlaps_a, *overlaps_b;
3117 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3118 DR_ACCESS_FN (drb, i),
3119 &overlaps_a, &overlaps_b,
3122 if (CF_NOT_KNOWN_P (overlaps_a)
3123 || CF_NOT_KNOWN_P (overlaps_b))
3125 finalize_ddr_dependent (ddr, chrec_dont_know);
3126 dependence_stats.num_dependence_undetermined++;
3127 free_conflict_function (overlaps_a);
3128 free_conflict_function (overlaps_b);
3132 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3133 || CF_NO_DEPENDENCE_P (overlaps_b))
3135 finalize_ddr_dependent (ddr, chrec_known);
3136 dependence_stats.num_dependence_independent++;
3137 free_conflict_function (overlaps_a);
3138 free_conflict_function (overlaps_b);
3144 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3145 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3146 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3153 /* Computes the conflicting iterations, and initialize DDR. */
3156 subscript_dependence_tester (struct data_dependence_relation *ddr)
3159 if (dump_file && (dump_flags & TDF_DETAILS))
3160 fprintf (dump_file, "(subscript_dependence_tester \n");
3162 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr)))
3163 dependence_stats.num_dependence_dependent++;
3165 compute_subscript_distance (ddr);
3166 if (build_classic_dist_vector (ddr))
3167 build_classic_dir_vector (ddr);
3169 if (dump_file && (dump_flags & TDF_DETAILS))
3170 fprintf (dump_file, ")\n");
3173 /* Returns true when all the access functions of A are affine or
3177 access_functions_are_affine_or_constant_p (struct data_reference *a)
3180 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3183 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3184 if (!evolution_function_is_constant_p (t)
3185 && !evolution_function_is_affine_multivariate_p (t))
3191 /* Initializes an equation for an OMEGA problem using the information
3192 contained in the ACCESS_FUN. Returns true when the operation
3195 PB is the omega constraint system.
3196 EQ is the number of the equation to be initialized.
3197 OFFSET is used for shifting the variables names in the constraints:
3198 a constrain is composed of 2 * the number of variables surrounding
3199 dependence accesses. OFFSET is set either to 0 for the first n variables,
3200 then it is set to n.
3201 ACCESS_FUN is expected to be an affine chrec. */
3204 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3205 unsigned int offset, tree access_fun,
3206 struct data_dependence_relation *ddr)
3208 switch (TREE_CODE (access_fun))
3210 case POLYNOMIAL_CHREC:
3212 tree left = CHREC_LEFT (access_fun);
3213 tree right = CHREC_RIGHT (access_fun);
3214 int var = CHREC_VARIABLE (access_fun);
3217 if (TREE_CODE (right) != INTEGER_CST)
3220 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3221 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3223 /* Compute the innermost loop index. */
3224 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3227 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3228 += int_cst_value (right);
3230 switch (TREE_CODE (left))
3232 case POLYNOMIAL_CHREC:
3233 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3236 pb->eqs[eq].coef[0] += int_cst_value (left);
3245 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3253 /* As explained in the comments preceding init_omega_for_ddr, we have
3254 to set up a system for each loop level, setting outer loops
3255 variation to zero, and current loop variation to positive or zero.
3256 Save each lexico positive distance vector. */
3259 omega_extract_distance_vectors (omega_pb pb,
3260 struct data_dependence_relation *ddr)
3264 struct loop *loopi, *loopj;
3265 enum omega_result res;
3267 /* Set a new problem for each loop in the nest. The basis is the
3268 problem that we have initialized until now. On top of this we
3269 add new constraints. */
3270 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3271 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3274 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3275 DDR_NB_LOOPS (ddr));
3277 omega_copy_problem (copy, pb);
3279 /* For all the outer loops "loop_j", add "dj = 0". */
3281 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3283 eq = omega_add_zero_eq (copy, omega_black);
3284 copy->eqs[eq].coef[j + 1] = 1;
3287 /* For "loop_i", add "0 <= di". */
3288 geq = omega_add_zero_geq (copy, omega_black);
3289 copy->geqs[geq].coef[i + 1] = 1;
3291 /* Reduce the constraint system, and test that the current
3292 problem is feasible. */
3293 res = omega_simplify_problem (copy);
3294 if (res == omega_false
3295 || res == omega_unknown
3296 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3299 for (eq = 0; eq < copy->num_subs; eq++)
3300 if (copy->subs[eq].key == (int) i + 1)
3302 dist = copy->subs[eq].coef[0];
3308 /* Reinitialize problem... */
3309 omega_copy_problem (copy, pb);
3311 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3313 eq = omega_add_zero_eq (copy, omega_black);
3314 copy->eqs[eq].coef[j + 1] = 1;
3317 /* ..., but this time "di = 1". */
3318 eq = omega_add_zero_eq (copy, omega_black);
3319 copy->eqs[eq].coef[i + 1] = 1;
3320 copy->eqs[eq].coef[0] = -1;
3322 res = omega_simplify_problem (copy);
3323 if (res == omega_false
3324 || res == omega_unknown
3325 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3328 for (eq = 0; eq < copy->num_subs; eq++)
3329 if (copy->subs[eq].key == (int) i + 1)
3331 dist = copy->subs[eq].coef[0];
3337 /* Save the lexicographically positive distance vector. */
3340 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3341 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3345 for (eq = 0; eq < copy->num_subs; eq++)
3346 if (copy->subs[eq].key > 0)
3348 dist = copy->subs[eq].coef[0];
3349 dist_v[copy->subs[eq].key - 1] = dist;
3352 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3353 dir_v[j] = dir_from_dist (dist_v[j]);
3355 save_dist_v (ddr, dist_v);
3356 save_dir_v (ddr, dir_v);
3360 omega_free_problem (copy);
3364 /* This is called for each subscript of a tuple of data references:
3365 insert an equality for representing the conflicts. */
3368 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3369 struct data_dependence_relation *ddr,
3370 omega_pb pb, bool *maybe_dependent)
3373 tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE);
3374 tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE);
3375 tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b);
3377 /* When the fun_a - fun_b is not constant, the dependence is not
3378 captured by the classic distance vector representation. */
3379 if (TREE_CODE (difference) != INTEGER_CST)
3383 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3385 /* There is no dependence. */
3386 *maybe_dependent = false;
3390 fun_b = chrec_fold_multiply (integer_type_node, fun_b,
3391 integer_minus_one_node);
3393 eq = omega_add_zero_eq (pb, omega_black);
3394 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3395 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3396 /* There is probably a dependence, but the system of
3397 constraints cannot be built: answer "don't know". */
3401 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3402 && !int_divides_p (lambda_vector_gcd
3403 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3404 2 * DDR_NB_LOOPS (ddr)),
3405 pb->eqs[eq].coef[0]))
3407 /* There is no dependence. */
3408 *maybe_dependent = false;
3415 /* Helper function, same as init_omega_for_ddr but specialized for
3416 data references A and B. */
3419 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3420 struct data_dependence_relation *ddr,
3421 omega_pb pb, bool *maybe_dependent)
3426 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3428 /* Insert an equality per subscript. */
3429 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3431 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3432 ddr, pb, maybe_dependent))
3434 else if (*maybe_dependent == false)
3436 /* There is no dependence. */
3437 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3442 /* Insert inequalities: constraints corresponding to the iteration
3443 domain, i.e. the loops surrounding the references "loop_x" and
3444 the distance variables "dx". The layout of the OMEGA
3445 representation is as follows:
3446 - coef[0] is the constant
3447 - coef[1..nb_loops] are the protected variables that will not be
3448 removed by the solver: the "dx"
3449 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3451 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3452 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3454 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, true);
3457 ineq = omega_add_zero_geq (pb, omega_black);
3458 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3460 /* 0 <= loop_x + dx */
3461 ineq = omega_add_zero_geq (pb, omega_black);
3462 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3463 pb->geqs[ineq].coef[i + 1] = 1;
3467 /* loop_x <= nb_iters */
3468 ineq = omega_add_zero_geq (pb, omega_black);
3469 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3470 pb->geqs[ineq].coef[0] = nbi;
3472 /* loop_x + dx <= nb_iters */
3473 ineq = omega_add_zero_geq (pb, omega_black);
3474 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3475 pb->geqs[ineq].coef[i + 1] = -1;
3476 pb->geqs[ineq].coef[0] = nbi;
3478 /* A step "dx" bigger than nb_iters is not feasible, so
3479 add "0 <= nb_iters + dx", */
3480 ineq = omega_add_zero_geq (pb, omega_black);
3481 pb->geqs[ineq].coef[i + 1] = 1;
3482 pb->geqs[ineq].coef[0] = nbi;
3483 /* and "dx <= nb_iters". */
3484 ineq = omega_add_zero_geq (pb, omega_black);
3485 pb->geqs[ineq].coef[i + 1] = -1;
3486 pb->geqs[ineq].coef[0] = nbi;
3490 omega_extract_distance_vectors (pb, ddr);
3495 /* Sets up the Omega dependence problem for the data dependence
3496 relation DDR. Returns false when the constraint system cannot be
3497 built, ie. when the test answers "don't know". Returns true
3498 otherwise, and when independence has been proved (using one of the
3499 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3500 set MAYBE_DEPENDENT to true.
3502 Example: for setting up the dependence system corresponding to the
3503 conflicting accesses
3508 | ... A[2*j, 2*(i + j)]
3512 the following constraints come from the iteration domain:
3519 where di, dj are the distance variables. The constraints
3520 representing the conflicting elements are:
3523 i + 1 = 2 * (i + di + j + dj)
3525 For asking that the resulting distance vector (di, dj) be
3526 lexicographically positive, we insert the constraint "di >= 0". If
3527 "di = 0" in the solution, we fix that component to zero, and we
3528 look at the inner loops: we set a new problem where all the outer
3529 loop distances are zero, and fix this inner component to be
3530 positive. When one of the components is positive, we save that
3531 distance, and set a new problem where the distance on this loop is
3532 zero, searching for other distances in the inner loops. Here is
3533 the classic example that illustrates that we have to set for each
3534 inner loop a new problem:
3542 we have to save two distances (1, 0) and (0, 1).
3544 Given two array references, refA and refB, we have to set the
3545 dependence problem twice, refA vs. refB and refB vs. refA, and we
3546 cannot do a single test, as refB might occur before refA in the
3547 inner loops, and the contrary when considering outer loops: ex.
3552 | T[{1,+,1}_2][{1,+,1}_1] // refA
3553 | T[{2,+,1}_2][{0,+,1}_1] // refB
3558 refB touches the elements in T before refA, and thus for the same
3559 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3560 but for successive loop_0 iterations, we have (1, -1, 1)
3562 The Omega solver expects the distance variables ("di" in the
3563 previous example) to come first in the constraint system (as
3564 variables to be protected, or "safe" variables), the constraint
3565 system is built using the following layout:
3567 "cst | distance vars | index vars".
3571 init_omega_for_ddr (struct data_dependence_relation *ddr,
3572 bool *maybe_dependent)
3577 *maybe_dependent = true;
3579 if (same_access_functions (ddr))
3582 lambda_vector dir_v;
3584 /* Save the 0 vector. */
3585 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3586 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3587 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3588 dir_v[j] = dir_equal;
3589 save_dir_v (ddr, dir_v);
3591 /* Save the dependences carried by outer loops. */
3592 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3593 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3595 omega_free_problem (pb);
3599 /* Omega expects the protected variables (those that have to be kept
3600 after elimination) to appear first in the constraint system.
3601 These variables are the distance variables. In the following
3602 initialization we declare NB_LOOPS safe variables, and the total
3603 number of variables for the constraint system is 2*NB_LOOPS. */
3604 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3605 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3607 omega_free_problem (pb);
3609 /* Stop computation if not decidable, or no dependence. */
3610 if (res == false || *maybe_dependent == false)
3613 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3614 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3616 omega_free_problem (pb);
3621 /* Return true when DDR contains the same information as that stored
3622 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3625 ddr_consistent_p (FILE *file,
3626 struct data_dependence_relation *ddr,
3627 VEC (lambda_vector, heap) *dist_vects,
3628 VEC (lambda_vector, heap) *dir_vects)
3632 /* If dump_file is set, output there. */
3633 if (dump_file && (dump_flags & TDF_DETAILS))
3636 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3638 lambda_vector b_dist_v;
3639 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3640 VEC_length (lambda_vector, dist_vects),
3641 DDR_NUM_DIST_VECTS (ddr));
3643 fprintf (file, "Banerjee dist vectors:\n");
3644 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3645 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3647 fprintf (file, "Omega dist vectors:\n");
3648 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3649 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3651 fprintf (file, "data dependence relation:\n");
3652 dump_data_dependence_relation (file, ddr);
3654 fprintf (file, ")\n");
3658 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3660 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3661 VEC_length (lambda_vector, dir_vects),
3662 DDR_NUM_DIR_VECTS (ddr));
3666 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3668 lambda_vector a_dist_v;
3669 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3671 /* Distance vectors are not ordered in the same way in the DDR
3672 and in the DIST_VECTS: search for a matching vector. */
3673 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3674 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3677 if (j == VEC_length (lambda_vector, dist_vects))
3679 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3680 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3681 fprintf (file, "not found in Omega dist vectors:\n");
3682 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3683 fprintf (file, "data dependence relation:\n");
3684 dump_data_dependence_relation (file, ddr);
3685 fprintf (file, ")\n");
3689 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3691 lambda_vector a_dir_v;
3692 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3694 /* Direction vectors are not ordered in the same way in the DDR
3695 and in the DIR_VECTS: search for a matching vector. */
3696 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3697 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3700 if (j == VEC_length (lambda_vector, dist_vects))
3702 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3703 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3704 fprintf (file, "not found in Omega dir vectors:\n");
3705 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3706 fprintf (file, "data dependence relation:\n");
3707 dump_data_dependence_relation (file, ddr);
3708 fprintf (file, ")\n");
3715 /* This computes the affine dependence relation between A and B.
3716 CHREC_KNOWN is used for representing the independence between two
3717 accesses, while CHREC_DONT_KNOW is used for representing the unknown
3720 Note that it is possible to stop the computation of the dependence
3721 relation the first time we detect a CHREC_KNOWN element for a given
3725 compute_affine_dependence (struct data_dependence_relation *ddr)
3727 struct data_reference *dra = DDR_A (ddr);
3728 struct data_reference *drb = DDR_B (ddr);
3730 if (dump_file && (dump_flags & TDF_DETAILS))
3732 fprintf (dump_file, "(compute_affine_dependence\n");
3733 fprintf (dump_file, " (stmt_a = \n");
3734 print_generic_expr (dump_file, DR_STMT (dra), 0);
3735 fprintf (dump_file, ")\n (stmt_b = \n");
3736 print_generic_expr (dump_file, DR_STMT (drb), 0);
3737 fprintf (dump_file, ")\n");
3740 /* Analyze only when the dependence relation is not yet known. */
3741 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3743 dependence_stats.num_dependence_tests++;
3745 if (access_functions_are_affine_or_constant_p (dra)
3746 && access_functions_are_affine_or_constant_p (drb))
3748 if (flag_check_data_deps)
3750 /* Compute the dependences using the first algorithm. */
3751 subscript_dependence_tester (ddr);
3753 if (dump_file && (dump_flags & TDF_DETAILS))
3755 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3756 dump_data_dependence_relation (dump_file, ddr);
3759 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3761 bool maybe_dependent;
3762 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3764 /* Save the result of the first DD analyzer. */
3765 dist_vects = DDR_DIST_VECTS (ddr);
3766 dir_vects = DDR_DIR_VECTS (ddr);
3768 /* Reset the information. */
3769 DDR_DIST_VECTS (ddr) = NULL;
3770 DDR_DIR_VECTS (ddr) = NULL;
3772 /* Compute the same information using Omega. */
3773 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3774 goto csys_dont_know;
3776 if (dump_file && (dump_flags & TDF_DETAILS))
3778 fprintf (dump_file, "Omega Analyzer\n");
3779 dump_data_dependence_relation (dump_file, ddr);
3782 /* Check that we get the same information. */
3783 if (maybe_dependent)
3784 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3789 subscript_dependence_tester (ddr);
3792 /* As a last case, if the dependence cannot be determined, or if
3793 the dependence is considered too difficult to determine, answer
3798 dependence_stats.num_dependence_undetermined++;
3800 if (dump_file && (dump_flags & TDF_DETAILS))
3802 fprintf (dump_file, "Data ref a:\n");
3803 dump_data_reference (dump_file, dra);
3804 fprintf (dump_file, "Data ref b:\n");
3805 dump_data_reference (dump_file, drb);
3806 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3808 finalize_ddr_dependent (ddr, chrec_dont_know);
3812 if (dump_file && (dump_flags & TDF_DETAILS))
3813 fprintf (dump_file, ")\n");
3816 /* This computes the dependence relation for the same data
3817 reference into DDR. */
3820 compute_self_dependence (struct data_dependence_relation *ddr)
3823 struct subscript *subscript;
3825 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3828 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3831 /* The accessed index overlaps for each iteration. */
3832 SUB_CONFLICTS_IN_A (subscript)
3833 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3834 SUB_CONFLICTS_IN_B (subscript)
3835 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3836 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3839 /* The distance vector is the zero vector. */
3840 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3841 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3844 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3845 the data references in DATAREFS, in the LOOP_NEST. When
3846 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3850 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3851 VEC (ddr_p, heap) **dependence_relations,
3852 VEC (loop_p, heap) *loop_nest,
3853 bool compute_self_and_rr)
3855 struct data_dependence_relation *ddr;
3856 struct data_reference *a, *b;
3859 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3860 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3861 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3863 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3864 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3865 compute_affine_dependence (ddr);
3868 if (compute_self_and_rr)
3869 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3871 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3872 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3873 compute_self_dependence (ddr);
3877 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3878 true if STMT clobbers memory, false otherwise. */
3881 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3883 bool clobbers_memory = false;
3885 tree *op0, *op1, call;
3889 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3890 Calls have side-effects, except those to const or pure
3892 call = get_call_expr_in (stmt);
3894 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3895 || (TREE_CODE (stmt) == ASM_EXPR
3896 && ASM_VOLATILE_P (stmt)))
3897 clobbers_memory = true;
3899 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3900 return clobbers_memory;
3902 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3904 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3905 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3908 || REFERENCE_CLASS_P (*op1))
3910 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3912 ref->is_read = true;
3916 || REFERENCE_CLASS_P (*op0))
3918 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3920 ref->is_read = false;
3926 unsigned i, n = call_expr_nargs (call);
3928 for (i = 0; i < n; i++)
3930 op0 = &CALL_EXPR_ARG (call, i);
3933 || REFERENCE_CLASS_P (*op0))
3935 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3937 ref->is_read = true;
3942 return clobbers_memory;
3945 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3946 reference, returns false, otherwise returns true. NEST is the outermost
3947 loop of the loop nest in that the references should be analysed. */
3950 find_data_references_in_stmt (struct loop *nest, tree stmt,
3951 VEC (data_reference_p, heap) **datarefs)
3954 VEC (data_ref_loc, heap) *references;
3957 data_reference_p dr;
3959 if (get_references_in_stmt (stmt, &references))
3961 VEC_free (data_ref_loc, heap, references);
3965 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
3967 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
3969 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
3976 VEC_free (data_ref_loc, heap, references);
3980 /* Search the data references in LOOP, and record the information into
3981 DATAREFS. Returns chrec_dont_know when failing to analyze a
3982 difficult case, returns NULL_TREE otherwise.
3984 TODO: This function should be made smarter so that it can handle address
3985 arithmetic as if they were array accesses, etc. */
3988 find_data_references_in_loop (struct loop *loop,
3989 VEC (data_reference_p, heap) **datarefs)
3991 basic_block bb, *bbs;
3993 block_stmt_iterator bsi;
3995 bbs = get_loop_body (loop);
3997 for (i = 0; i < loop->num_nodes; i++)
4001 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4003 tree stmt = bsi_stmt (bsi);
4005 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4007 struct data_reference *res;
4008 res = XCNEW (struct data_reference);
4009 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4012 return chrec_dont_know;
4021 /* Recursive helper function. */
4024 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4026 /* Inner loops of the nest should not contain siblings. Example:
4027 when there are two consecutive loops,
4038 the dependence relation cannot be captured by the distance
4043 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4045 return find_loop_nest_1 (loop->inner, loop_nest);
4049 /* Return false when the LOOP is not well nested. Otherwise return
4050 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4051 contain the loops from the outermost to the innermost, as they will
4052 appear in the classic distance vector. */
4055 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4057 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4059 return find_loop_nest_1 (loop->inner, loop_nest);
4063 /* Given a loop nest LOOP, the following vectors are returned:
4064 DATAREFS is initialized to all the array elements contained in this loop,
4065 DEPENDENCE_RELATIONS contains the relations between the data references.
4066 Compute read-read and self relations if
4067 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4070 compute_data_dependences_for_loop (struct loop *loop,
4071 bool compute_self_and_read_read_dependences,
4072 VEC (data_reference_p, heap) **datarefs,
4073 VEC (ddr_p, heap) **dependence_relations)
4075 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4077 memset (&dependence_stats, 0, sizeof (dependence_stats));
4079 /* If the loop nest is not well formed, or one of the data references
4080 is not computable, give up without spending time to compute other
4083 || !find_loop_nest (loop, &vloops)
4084 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4086 struct data_dependence_relation *ddr;
4088 /* Insert a single relation into dependence_relations:
4090 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4091 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4094 compute_all_dependences (*datarefs, dependence_relations, vloops,
4095 compute_self_and_read_read_dependences);
4097 if (dump_file && (dump_flags & TDF_STATS))
4099 fprintf (dump_file, "Dependence tester statistics:\n");
4101 fprintf (dump_file, "Number of dependence tests: %d\n",
4102 dependence_stats.num_dependence_tests);
4103 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4104 dependence_stats.num_dependence_dependent);
4105 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4106 dependence_stats.num_dependence_independent);
4107 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4108 dependence_stats.num_dependence_undetermined);
4110 fprintf (dump_file, "Number of subscript tests: %d\n",
4111 dependence_stats.num_subscript_tests);
4112 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4113 dependence_stats.num_subscript_undetermined);
4114 fprintf (dump_file, "Number of same subscript function: %d\n",
4115 dependence_stats.num_same_subscript_function);
4117 fprintf (dump_file, "Number of ziv tests: %d\n",
4118 dependence_stats.num_ziv);
4119 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4120 dependence_stats.num_ziv_dependent);
4121 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4122 dependence_stats.num_ziv_independent);
4123 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4124 dependence_stats.num_ziv_unimplemented);
4126 fprintf (dump_file, "Number of siv tests: %d\n",
4127 dependence_stats.num_siv);
4128 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4129 dependence_stats.num_siv_dependent);
4130 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4131 dependence_stats.num_siv_independent);
4132 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4133 dependence_stats.num_siv_unimplemented);
4135 fprintf (dump_file, "Number of miv tests: %d\n",
4136 dependence_stats.num_miv);
4137 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4138 dependence_stats.num_miv_dependent);
4139 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4140 dependence_stats.num_miv_independent);
4141 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4142 dependence_stats.num_miv_unimplemented);
4146 /* Entry point (for testing only). Analyze all the data references
4147 and the dependence relations in LOOP.
4149 The data references are computed first.
4151 A relation on these nodes is represented by a complete graph. Some
4152 of the relations could be of no interest, thus the relations can be
4155 In the following function we compute all the relations. This is
4156 just a first implementation that is here for:
4157 - for showing how to ask for the dependence relations,
4158 - for the debugging the whole dependence graph,
4159 - for the dejagnu testcases and maintenance.
4161 It is possible to ask only for a part of the graph, avoiding to
4162 compute the whole dependence graph. The computed dependences are
4163 stored in a knowledge base (KB) such that later queries don't
4164 recompute the same information. The implementation of this KB is
4165 transparent to the optimizer, and thus the KB can be changed with a
4166 more efficient implementation, or the KB could be disabled. */
4168 analyze_all_data_dependences (struct loop *loop)
4171 int nb_data_refs = 10;
4172 VEC (data_reference_p, heap) *datarefs =
4173 VEC_alloc (data_reference_p, heap, nb_data_refs);
4174 VEC (ddr_p, heap) *dependence_relations =
4175 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4177 /* Compute DDs on the whole function. */
4178 compute_data_dependences_for_loop (loop, false, &datarefs,
4179 &dependence_relations);
4183 dump_data_dependence_relations (dump_file, dependence_relations);
4184 fprintf (dump_file, "\n\n");
4186 if (dump_flags & TDF_DETAILS)
4187 dump_dist_dir_vectors (dump_file, dependence_relations);
4189 if (dump_flags & TDF_STATS)
4191 unsigned nb_top_relations = 0;
4192 unsigned nb_bot_relations = 0;
4193 unsigned nb_basename_differ = 0;
4194 unsigned nb_chrec_relations = 0;
4195 struct data_dependence_relation *ddr;
4197 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4199 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4202 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4204 struct data_reference *a = DDR_A (ddr);
4205 struct data_reference *b = DDR_B (ddr);
4207 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4208 nb_basename_differ++;
4214 nb_chrec_relations++;
4217 gather_stats_on_scev_database ();
4221 free_dependence_relations (dependence_relations);
4222 free_data_refs (datarefs);
4225 /* Computes all the data dependences and check that the results of
4226 several analyzers are the same. */
4229 tree_check_data_deps (void)
4232 struct loop *loop_nest;
4234 FOR_EACH_LOOP (li, loop_nest, 0)
4235 analyze_all_data_dependences (loop_nest);
4238 /* Free the memory used by a data dependence relation DDR. */
4241 free_dependence_relation (struct data_dependence_relation *ddr)
4246 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_SUBSCRIPTS (ddr))
4247 free_subscripts (DDR_SUBSCRIPTS (ddr));
4252 /* Free the memory used by the data dependence relations from
4253 DEPENDENCE_RELATIONS. */
4256 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4259 struct data_dependence_relation *ddr;
4260 VEC (loop_p, heap) *loop_nest = NULL;
4262 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4266 if (loop_nest == NULL)
4267 loop_nest = DDR_LOOP_NEST (ddr);
4269 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4270 || DDR_LOOP_NEST (ddr) == loop_nest);
4271 free_dependence_relation (ddr);
4275 VEC_free (loop_p, heap, loop_nest);
4276 VEC_free (ddr_p, heap, dependence_relations);
4279 /* Free the memory used by the data references from DATAREFS. */
4282 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4285 struct data_reference *dr;
4287 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4289 VEC_free (data_reference_p, heap, datarefs);