1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
84 /* These RTL headers are needed for basic-block.h. */
86 #include "basic-block.h"
87 #include "diagnostic.h"
88 #include "tree-flow.h"
89 #include "tree-dump.h"
92 #include "tree-chrec.h"
93 #include "tree-data-ref.h"
94 #include "tree-scalar-evolution.h"
95 #include "tree-pass.h"
96 #include "langhooks.h"
98 static struct datadep_stats
100 int num_dependence_tests;
101 int num_dependence_dependent;
102 int num_dependence_independent;
103 int num_dependence_undetermined;
105 int num_subscript_tests;
106 int num_subscript_undetermined;
107 int num_same_subscript_function;
110 int num_ziv_independent;
111 int num_ziv_dependent;
112 int num_ziv_unimplemented;
115 int num_siv_independent;
116 int num_siv_dependent;
117 int num_siv_unimplemented;
120 int num_miv_independent;
121 int num_miv_dependent;
122 int num_miv_unimplemented;
125 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
126 struct data_reference *,
127 struct data_reference *);
128 /* Returns true iff A divides B. */
131 tree_fold_divides_p (tree a, tree b)
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
138 /* Returns true iff A divides B. */
141 int_divides_p (int a, int b)
143 return ((b % a) == 0);
148 /* Dump into FILE all the data references from DATAREFS. */
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
154 struct data_reference *dr;
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
160 /* Dump into FILE all the dependence relations from DDRS. */
163 dump_data_dependence_relations (FILE *file,
164 VEC (ddr_p, heap) *ddrs)
167 struct data_dependence_relation *ddr;
169 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
170 dump_data_dependence_relation (file, ddr);
173 /* Dump function for a DATA_REFERENCE structure. */
176 dump_data_reference (FILE *outf,
177 struct data_reference *dr)
181 fprintf (outf, "(Data Ref: \n stmt: ");
182 print_generic_stmt (outf, DR_STMT (dr), 0);
183 fprintf (outf, " ref: ");
184 print_generic_stmt (outf, DR_REF (dr), 0);
185 fprintf (outf, " base_object: ");
186 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
188 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
190 fprintf (outf, " Access function %d: ", i);
191 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
193 fprintf (outf, ")\n");
196 /* Dumps the affine function described by FN to the file OUTF. */
199 dump_affine_function (FILE *outf, affine_fn fn)
204 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
205 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
207 fprintf (outf, " + ");
208 print_generic_expr (outf, coef, TDF_SLIM);
209 fprintf (outf, " * x_%u", i);
213 /* Dumps the conflict function CF to the file OUTF. */
216 dump_conflict_function (FILE *outf, conflict_function *cf)
220 if (cf->n == NO_DEPENDENCE)
221 fprintf (outf, "no dependence\n");
222 else if (cf->n == NOT_KNOWN)
223 fprintf (outf, "not known\n");
226 for (i = 0; i < cf->n; i++)
229 dump_affine_function (outf, cf->fns[i]);
230 fprintf (outf, "]\n");
235 /* Dump function for a SUBSCRIPT structure. */
238 dump_subscript (FILE *outf, struct subscript *subscript)
240 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
242 fprintf (outf, "\n (subscript \n");
243 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
244 dump_conflict_function (outf, cf);
245 if (CF_NONTRIVIAL_P (cf))
247 tree last_iteration = SUB_LAST_CONFLICT (subscript);
248 fprintf (outf, " last_conflict: ");
249 print_generic_stmt (outf, last_iteration, 0);
252 cf = SUB_CONFLICTS_IN_B (subscript);
253 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
254 dump_conflict_function (outf, cf);
255 if (CF_NONTRIVIAL_P (cf))
257 tree last_iteration = SUB_LAST_CONFLICT (subscript);
258 fprintf (outf, " last_conflict: ");
259 print_generic_stmt (outf, last_iteration, 0);
262 fprintf (outf, " (Subscript distance: ");
263 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
264 fprintf (outf, " )\n");
265 fprintf (outf, " )\n");
268 /* Print the classic direction vector DIRV to OUTF. */
271 print_direction_vector (FILE *outf,
277 for (eq = 0; eq < length; eq++)
279 enum data_dependence_direction dir = dirv[eq];
284 fprintf (outf, " +");
287 fprintf (outf, " -");
290 fprintf (outf, " =");
292 case dir_positive_or_equal:
293 fprintf (outf, " +=");
295 case dir_positive_or_negative:
296 fprintf (outf, " +-");
298 case dir_negative_or_equal:
299 fprintf (outf, " -=");
302 fprintf (outf, " *");
305 fprintf (outf, "indep");
309 fprintf (outf, "\n");
312 /* Print a vector of direction vectors. */
315 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
321 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
322 print_direction_vector (outf, v, length);
325 /* Print a vector of distance vectors. */
328 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
334 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
335 print_lambda_vector (outf, v, length);
341 debug_data_dependence_relation (struct data_dependence_relation *ddr)
343 dump_data_dependence_relation (stderr, ddr);
346 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
349 dump_data_dependence_relation (FILE *outf,
350 struct data_dependence_relation *ddr)
352 struct data_reference *dra, *drb;
356 fprintf (outf, "(Data Dep: \n");
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 fprintf (outf, " (don't know)\n");
360 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
361 fprintf (outf, " (no dependence)\n");
363 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
368 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
370 fprintf (outf, " access_fn_A: ");
371 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
372 fprintf (outf, " access_fn_B: ");
373 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
374 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
377 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
378 fprintf (outf, " loop nest: (");
379 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
380 fprintf (outf, "%d ", loopi->num);
381 fprintf (outf, ")\n");
383 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
385 fprintf (outf, " distance_vector: ");
386 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
390 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
392 fprintf (outf, " direction_vector: ");
393 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
398 fprintf (outf, ")\n");
401 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
404 dump_data_dependence_direction (FILE *file,
405 enum data_dependence_direction dir)
421 case dir_positive_or_negative:
422 fprintf (file, "+-");
425 case dir_positive_or_equal:
426 fprintf (file, "+=");
429 case dir_negative_or_equal:
430 fprintf (file, "-=");
442 /* Dumps the distance and direction vectors in FILE. DDRS contains
443 the dependence relations, and VECT_SIZE is the size of the
444 dependence vectors, or in other words the number of loops in the
448 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
451 struct data_dependence_relation *ddr;
454 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
455 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
457 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
459 fprintf (file, "DISTANCE_V (");
460 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
461 fprintf (file, ")\n");
464 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
466 fprintf (file, "DIRECTION_V (");
467 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
468 fprintf (file, ")\n");
472 fprintf (file, "\n\n");
475 /* Dumps the data dependence relations DDRS in FILE. */
478 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
481 struct data_dependence_relation *ddr;
483 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
484 dump_data_dependence_relation (file, ddr);
486 fprintf (file, "\n\n");
489 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
490 will be ssizetype. */
493 split_constant_offset (tree exp, tree *var, tree *off)
495 tree type = TREE_TYPE (exp), otype;
501 otype = TREE_TYPE (exp);
503 switch (TREE_CODE (exp))
506 *var = build_int_cst (type, 0);
507 *off = fold_convert (ssizetype, exp);
512 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
513 split_constant_offset (TREE_OPERAND (exp, 1), &var1, &off1);
514 *var = fold_convert (type, fold_build2 (TREE_CODE (exp), otype,
516 *off = size_binop (TREE_CODE (exp), off0, off1);
520 off1 = TREE_OPERAND (exp, 1);
521 if (TREE_CODE (off1) != INTEGER_CST)
524 split_constant_offset (TREE_OPERAND (exp, 0), &var0, &off0);
525 *var = fold_convert (type, fold_build2 (MULT_EXPR, otype,
527 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, off1));
532 tree op, base, poffset;
533 HOST_WIDE_INT pbitsize, pbitpos;
534 enum machine_mode pmode;
535 int punsignedp, pvolatilep;
537 op = TREE_OPERAND (exp, 0);
538 if (!handled_component_p (op))
541 base = get_inner_reference (op, &pbitsize, &pbitpos, &poffset,
542 &pmode, &punsignedp, &pvolatilep, false);
544 if (pbitpos % BITS_PER_UNIT != 0)
546 base = build_fold_addr_expr (base);
547 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
551 split_constant_offset (poffset, &poffset, &off1);
552 off0 = size_binop (PLUS_EXPR, off0, off1);
553 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base),
555 fold_convert (TREE_TYPE (base), poffset));
558 *var = fold_convert (type, base);
567 *off = ssize_int (0);
570 /* Returns the address ADDR of an object in a canonical shape (without nop
571 casts, and with type of pointer to the object). */
574 canonicalize_base_object_address (tree addr)
580 /* The base address may be obtained by casting from integer, in that case
582 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
585 if (TREE_CODE (addr) != ADDR_EXPR)
588 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
591 /* Analyzes the behavior of the memory reference DR in the innermost loop that
595 dr_analyze_innermost (struct data_reference *dr)
597 tree stmt = DR_STMT (dr);
598 struct loop *loop = loop_containing_stmt (stmt);
599 tree ref = DR_REF (dr);
600 HOST_WIDE_INT pbitsize, pbitpos;
602 enum machine_mode pmode;
603 int punsignedp, pvolatilep;
604 affine_iv base_iv, offset_iv;
605 tree init, dinit, step;
607 if (dump_file && (dump_flags & TDF_DETAILS))
608 fprintf (dump_file, "analyze_innermost: ");
610 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
611 &pmode, &punsignedp, &pvolatilep, false);
612 gcc_assert (base != NULL_TREE);
614 if (pbitpos % BITS_PER_UNIT != 0)
616 if (dump_file && (dump_flags & TDF_DETAILS))
617 fprintf (dump_file, "failed: bit offset alignment.\n");
621 base = build_fold_addr_expr (base);
622 if (!simple_iv (loop, stmt, base, &base_iv, false))
624 if (dump_file && (dump_flags & TDF_DETAILS))
625 fprintf (dump_file, "failed: evolution of base is not affine.\n");
630 offset_iv.base = ssize_int (0);
631 offset_iv.step = ssize_int (0);
633 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
635 if (dump_file && (dump_flags & TDF_DETAILS))
636 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
640 init = ssize_int (pbitpos / BITS_PER_UNIT);
641 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
642 init = size_binop (PLUS_EXPR, init, dinit);
643 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
644 init = size_binop (PLUS_EXPR, init, dinit);
646 step = size_binop (PLUS_EXPR,
647 fold_convert (ssizetype, base_iv.step),
648 fold_convert (ssizetype, offset_iv.step));
650 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
652 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
656 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
658 if (dump_file && (dump_flags & TDF_DETAILS))
659 fprintf (dump_file, "success.\n");
662 /* Determines the base object and the list of indices of memory reference
663 DR, analyzed in loop nest NEST. */
666 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
668 tree stmt = DR_STMT (dr);
669 struct loop *loop = loop_containing_stmt (stmt);
670 VEC (tree, heap) *access_fns = NULL;
671 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
672 tree base, off, access_fn;
674 while (handled_component_p (aref))
676 if (TREE_CODE (aref) == ARRAY_REF)
678 op = TREE_OPERAND (aref, 1);
679 access_fn = analyze_scalar_evolution (loop, op);
680 access_fn = resolve_mixers (nest, access_fn);
681 VEC_safe_push (tree, heap, access_fns, access_fn);
683 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
686 aref = TREE_OPERAND (aref, 0);
689 if (INDIRECT_REF_P (aref))
691 op = TREE_OPERAND (aref, 0);
692 access_fn = analyze_scalar_evolution (loop, op);
693 access_fn = resolve_mixers (nest, access_fn);
694 base = initial_condition (access_fn);
695 split_constant_offset (base, &base, &off);
696 access_fn = chrec_replace_initial_condition (access_fn,
697 fold_convert (TREE_TYPE (base), off));
699 TREE_OPERAND (aref, 0) = base;
700 VEC_safe_push (tree, heap, access_fns, access_fn);
703 DR_BASE_OBJECT (dr) = ref;
704 DR_ACCESS_FNS (dr) = access_fns;
707 /* Extracts the alias analysis information from the memory reference DR. */
710 dr_analyze_alias (struct data_reference *dr)
712 tree stmt = DR_STMT (dr);
713 tree ref = DR_REF (dr);
714 tree base = get_base_address (ref), addr, smt = NULL_TREE;
721 else if (INDIRECT_REF_P (base))
723 addr = TREE_OPERAND (base, 0);
724 if (TREE_CODE (addr) == SSA_NAME)
726 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
727 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
731 DR_SYMBOL_TAG (dr) = smt;
732 if (var_can_have_subvars (smt))
733 DR_SUBVARS (dr) = get_subvars_for_var (smt);
735 vops = BITMAP_ALLOC (NULL);
736 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
738 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
744 /* Returns true if the address of DR is invariant. */
747 dr_address_invariant_p (struct data_reference *dr)
752 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
753 if (tree_contains_chrecs (idx, NULL))
759 /* Frees data reference DR. */
762 free_data_ref (data_reference_p dr)
764 BITMAP_FREE (DR_VOPS (dr));
765 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
769 /* Analyzes memory reference MEMREF accessed in STMT. The reference
770 is read if IS_READ is true, write otherwise. Returns the
771 data_reference description of MEMREF. NEST is the outermost loop of the
772 loop nest in that the reference should be analysed. */
774 struct data_reference *
775 create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
777 struct data_reference *dr;
779 if (dump_file && (dump_flags & TDF_DETAILS))
781 fprintf (dump_file, "Creating dr for ");
782 print_generic_expr (dump_file, memref, TDF_SLIM);
783 fprintf (dump_file, "\n");
786 dr = XCNEW (struct data_reference);
788 DR_REF (dr) = memref;
789 DR_IS_READ (dr) = is_read;
791 dr_analyze_innermost (dr);
792 dr_analyze_indices (dr, nest);
793 dr_analyze_alias (dr);
795 if (dump_file && (dump_flags & TDF_DETAILS))
797 fprintf (dump_file, "\tbase_address: ");
798 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
799 fprintf (dump_file, "\n\toffset from base address: ");
800 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
801 fprintf (dump_file, "\n\tconstant offset from base address: ");
802 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
803 fprintf (dump_file, "\n\tstep: ");
804 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
805 fprintf (dump_file, "\n\taligned to: ");
806 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
807 fprintf (dump_file, "\n\tbase_object: ");
808 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
809 fprintf (dump_file, "\n\tsymbol tag: ");
810 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
811 fprintf (dump_file, "\n");
817 /* Returns true if FNA == FNB. */
820 affine_function_equal_p (affine_fn fna, affine_fn fnb)
822 unsigned i, n = VEC_length (tree, fna);
824 if (n != VEC_length (tree, fnb))
827 for (i = 0; i < n; i++)
828 if (!operand_equal_p (VEC_index (tree, fna, i),
829 VEC_index (tree, fnb, i), 0))
835 /* If all the functions in CF are the same, returns one of them,
836 otherwise returns NULL. */
839 common_affine_function (conflict_function *cf)
844 if (!CF_NONTRIVIAL_P (cf))
849 for (i = 1; i < cf->n; i++)
850 if (!affine_function_equal_p (comm, cf->fns[i]))
856 /* Returns the base of the affine function FN. */
859 affine_function_base (affine_fn fn)
861 return VEC_index (tree, fn, 0);
864 /* Returns true if FN is a constant. */
867 affine_function_constant_p (affine_fn fn)
872 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
873 if (!integer_zerop (coef))
879 /* Returns true if FN is the zero constant function. */
882 affine_function_zero_p (affine_fn fn)
884 return (integer_zerop (affine_function_base (fn))
885 && affine_function_constant_p (fn));
888 /* Applies operation OP on affine functions FNA and FNB, and returns the
892 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
898 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
900 n = VEC_length (tree, fna);
901 m = VEC_length (tree, fnb);
905 n = VEC_length (tree, fnb);
906 m = VEC_length (tree, fna);
909 ret = VEC_alloc (tree, heap, m);
910 for (i = 0; i < n; i++)
911 VEC_quick_push (tree, ret,
912 fold_build2 (op, integer_type_node,
913 VEC_index (tree, fna, i),
914 VEC_index (tree, fnb, i)));
916 for (; VEC_iterate (tree, fna, i, coef); i++)
917 VEC_quick_push (tree, ret,
918 fold_build2 (op, integer_type_node,
919 coef, integer_zero_node));
920 for (; VEC_iterate (tree, fnb, i, coef); i++)
921 VEC_quick_push (tree, ret,
922 fold_build2 (op, integer_type_node,
923 integer_zero_node, coef));
928 /* Returns the sum of affine functions FNA and FNB. */
931 affine_fn_plus (affine_fn fna, affine_fn fnb)
933 return affine_fn_op (PLUS_EXPR, fna, fnb);
936 /* Returns the difference of affine functions FNA and FNB. */
939 affine_fn_minus (affine_fn fna, affine_fn fnb)
941 return affine_fn_op (MINUS_EXPR, fna, fnb);
944 /* Frees affine function FN. */
947 affine_fn_free (affine_fn fn)
949 VEC_free (tree, heap, fn);
952 /* Determine for each subscript in the data dependence relation DDR
956 compute_subscript_distance (struct data_dependence_relation *ddr)
958 conflict_function *cf_a, *cf_b;
959 affine_fn fn_a, fn_b, diff;
961 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
965 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
967 struct subscript *subscript;
969 subscript = DDR_SUBSCRIPT (ddr, i);
970 cf_a = SUB_CONFLICTS_IN_A (subscript);
971 cf_b = SUB_CONFLICTS_IN_B (subscript);
973 fn_a = common_affine_function (cf_a);
974 fn_b = common_affine_function (cf_b);
977 SUB_DISTANCE (subscript) = chrec_dont_know;
980 diff = affine_fn_minus (fn_a, fn_b);
982 if (affine_function_constant_p (diff))
983 SUB_DISTANCE (subscript) = affine_function_base (diff);
985 SUB_DISTANCE (subscript) = chrec_dont_know;
987 affine_fn_free (diff);
992 /* Returns the conflict function for "unknown". */
994 static conflict_function *
995 conflict_fn_not_known (void)
997 conflict_function *fn = XCNEW (conflict_function);
1003 /* Returns the conflict function for "independent". */
1005 static conflict_function *
1006 conflict_fn_no_dependence (void)
1008 conflict_function *fn = XCNEW (conflict_function);
1009 fn->n = NO_DEPENDENCE;
1014 /* Returns true if the address of OBJ is invariant in LOOP. */
1017 object_address_invariant_in_loop_p (struct loop *loop, tree obj)
1019 while (handled_component_p (obj))
1021 if (TREE_CODE (obj) == ARRAY_REF)
1023 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1024 need to check the stride and the lower bound of the reference. */
1025 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1027 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1031 else if (TREE_CODE (obj) == COMPONENT_REF)
1033 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1037 obj = TREE_OPERAND (obj, 0);
1040 if (!INDIRECT_REF_P (obj))
1043 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1047 /* Returns true if A and B are accesses to different objects, or to different
1048 fields of the same object. */
1051 disjoint_objects_p (tree a, tree b)
1053 tree base_a, base_b;
1054 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1057 base_a = get_base_address (a);
1058 base_b = get_base_address (b);
1062 && base_a != base_b)
1065 if (!operand_equal_p (base_a, base_b, 0))
1068 /* Compare the component references of A and B. We must start from the inner
1069 ones, so record them to the vector first. */
1070 while (handled_component_p (a))
1072 VEC_safe_push (tree, heap, comp_a, a);
1073 a = TREE_OPERAND (a, 0);
1075 while (handled_component_p (b))
1077 VEC_safe_push (tree, heap, comp_b, b);
1078 b = TREE_OPERAND (b, 0);
1084 if (VEC_length (tree, comp_a) == 0
1085 || VEC_length (tree, comp_b) == 0)
1088 a = VEC_pop (tree, comp_a);
1089 b = VEC_pop (tree, comp_b);
1091 /* Real and imaginary part of a variable do not alias. */
1092 if ((TREE_CODE (a) == REALPART_EXPR
1093 && TREE_CODE (b) == IMAGPART_EXPR)
1094 || (TREE_CODE (a) == IMAGPART_EXPR
1095 && TREE_CODE (b) == REALPART_EXPR))
1101 if (TREE_CODE (a) != TREE_CODE (b))
1104 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1105 DR_BASE_OBJECT are always zero. */
1106 if (TREE_CODE (a) == ARRAY_REF)
1108 else if (TREE_CODE (a) == COMPONENT_REF)
1110 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1113 /* Different fields of unions may overlap. */
1114 base_a = TREE_OPERAND (a, 0);
1115 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1118 /* Different fields of structures cannot. */
1126 VEC_free (tree, heap, comp_a);
1127 VEC_free (tree, heap, comp_b);
1132 /* Returns false if we can prove that data references A and B do not alias,
1136 dr_may_alias_p (struct data_reference *a, struct data_reference *b)
1138 tree addr_a = DR_BASE_ADDRESS (a);
1139 tree addr_b = DR_BASE_ADDRESS (b);
1140 tree type_a, type_b;
1141 tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1143 /* If the sets of virtual operands are disjoint, the memory references do not
1145 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1148 /* If the accessed objects are disjoint, the memory references do not
1150 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1153 if (!addr_a || !addr_b)
1156 /* If the references are based on different static objects, they cannot alias
1157 (PTA should be able to disambiguate such accesses, but often it fails to,
1158 since currently we cannot distinguish between pointer and offset in pointer
1160 if (TREE_CODE (addr_a) == ADDR_EXPR
1161 && TREE_CODE (addr_b) == ADDR_EXPR)
1162 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1164 /* An instruction writing through a restricted pointer is "independent" of any
1165 instruction reading or writing through a different restricted pointer,
1166 in the same block/scope. */
1168 type_a = TREE_TYPE (addr_a);
1169 type_b = TREE_TYPE (addr_b);
1170 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1172 if (TREE_CODE (addr_a) == SSA_NAME)
1173 decl_a = SSA_NAME_VAR (addr_a);
1174 if (TREE_CODE (addr_b) == SSA_NAME)
1175 decl_b = SSA_NAME_VAR (addr_b);
1177 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1178 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1179 && decl_a && TREE_CODE (decl_a) == PARM_DECL
1180 && decl_b && TREE_CODE (decl_b) == PARM_DECL
1181 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1182 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1188 /* Initialize a data dependence relation between data accesses A and
1189 B. NB_LOOPS is the number of loops surrounding the references: the
1190 size of the classic distance/direction vectors. */
1192 static struct data_dependence_relation *
1193 initialize_data_dependence_relation (struct data_reference *a,
1194 struct data_reference *b,
1195 VEC (loop_p, heap) *loop_nest)
1197 struct data_dependence_relation *res;
1200 res = XNEW (struct data_dependence_relation);
1203 DDR_LOOP_NEST (res) = NULL;
1205 if (a == NULL || b == NULL)
1207 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1211 /* If the data references do not alias, then they are independent. */
1212 if (!dr_may_alias_p (a, b))
1214 DDR_ARE_DEPENDENT (res) = chrec_known;
1218 /* If the references do not access the same object, we do not know
1219 whether they alias or not. */
1220 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1222 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1226 /* If the base of the object is not invariant in the loop nest, we cannot
1227 analyse it. TODO -- in fact, it would suffice to record that there may
1228 be arbitrary dependences in the loops where the base object varies. */
1229 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1230 DR_BASE_OBJECT (a)))
1232 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1236 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1238 DDR_AFFINE_P (res) = true;
1239 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1240 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1241 DDR_LOOP_NEST (res) = loop_nest;
1242 DDR_INNER_LOOP (res) = 0;
1243 DDR_DIR_VECTS (res) = NULL;
1244 DDR_DIST_VECTS (res) = NULL;
1246 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1248 struct subscript *subscript;
1250 subscript = XNEW (struct subscript);
1251 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1252 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1253 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1254 SUB_DISTANCE (subscript) = chrec_dont_know;
1255 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1261 /* Frees memory used by the conflict function F. */
1264 free_conflict_function (conflict_function *f)
1268 if (CF_NONTRIVIAL_P (f))
1270 for (i = 0; i < f->n; i++)
1271 affine_fn_free (f->fns[i]);
1276 /* Frees memory used by SUBSCRIPTS. */
1279 free_subscripts (VEC (subscript_p, heap) *subscripts)
1284 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1286 free_conflict_function (s->conflicting_iterations_in_a);
1287 free_conflict_function (s->conflicting_iterations_in_b);
1289 VEC_free (subscript_p, heap, subscripts);
1292 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1296 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1299 if (dump_file && (dump_flags & TDF_DETAILS))
1301 fprintf (dump_file, "(dependence classified: ");
1302 print_generic_expr (dump_file, chrec, 0);
1303 fprintf (dump_file, ")\n");
1306 DDR_ARE_DEPENDENT (ddr) = chrec;
1307 free_subscripts (DDR_SUBSCRIPTS (ddr));
1310 /* The dependence relation DDR cannot be represented by a distance
1314 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1316 if (dump_file && (dump_flags & TDF_DETAILS))
1317 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1319 DDR_AFFINE_P (ddr) = false;
1324 /* This section contains the classic Banerjee tests. */
1326 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1327 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1330 ziv_subscript_p (tree chrec_a,
1333 return (evolution_function_is_constant_p (chrec_a)
1334 && evolution_function_is_constant_p (chrec_b));
1337 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1338 variable, i.e., if the SIV (Single Index Variable) test is true. */
1341 siv_subscript_p (tree chrec_a,
1344 if ((evolution_function_is_constant_p (chrec_a)
1345 && evolution_function_is_univariate_p (chrec_b))
1346 || (evolution_function_is_constant_p (chrec_b)
1347 && evolution_function_is_univariate_p (chrec_a)))
1350 if (evolution_function_is_univariate_p (chrec_a)
1351 && evolution_function_is_univariate_p (chrec_b))
1353 switch (TREE_CODE (chrec_a))
1355 case POLYNOMIAL_CHREC:
1356 switch (TREE_CODE (chrec_b))
1358 case POLYNOMIAL_CHREC:
1359 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1374 /* Creates a conflict function with N dimensions. The affine functions
1375 in each dimension follow. */
1377 static conflict_function *
1378 conflict_fn (unsigned n, ...)
1381 conflict_function *ret = XCNEW (conflict_function);
1384 gcc_assert (0 < n && n <= MAX_DIM);
1388 for (i = 0; i < n; i++)
1389 ret->fns[i] = va_arg (ap, affine_fn);
1395 /* Returns constant affine function with value CST. */
1398 affine_fn_cst (tree cst)
1400 affine_fn fn = VEC_alloc (tree, heap, 1);
1401 VEC_quick_push (tree, fn, cst);
1405 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1408 affine_fn_univar (tree cst, unsigned dim, tree coef)
1410 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1413 gcc_assert (dim > 0);
1414 VEC_quick_push (tree, fn, cst);
1415 for (i = 1; i < dim; i++)
1416 VEC_quick_push (tree, fn, integer_zero_node);
1417 VEC_quick_push (tree, fn, coef);
1421 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1422 *OVERLAPS_B are initialized to the functions that describe the
1423 relation between the elements accessed twice by CHREC_A and
1424 CHREC_B. For k >= 0, the following property is verified:
1426 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1429 analyze_ziv_subscript (tree chrec_a,
1431 conflict_function **overlaps_a,
1432 conflict_function **overlaps_b,
1433 tree *last_conflicts)
1436 dependence_stats.num_ziv++;
1438 if (dump_file && (dump_flags & TDF_DETAILS))
1439 fprintf (dump_file, "(analyze_ziv_subscript \n");
1441 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1442 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1443 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
1445 switch (TREE_CODE (difference))
1448 if (integer_zerop (difference))
1450 /* The difference is equal to zero: the accessed index
1451 overlaps for each iteration in the loop. */
1452 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1453 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1454 *last_conflicts = chrec_dont_know;
1455 dependence_stats.num_ziv_dependent++;
1459 /* The accesses do not overlap. */
1460 *overlaps_a = conflict_fn_no_dependence ();
1461 *overlaps_b = conflict_fn_no_dependence ();
1462 *last_conflicts = integer_zero_node;
1463 dependence_stats.num_ziv_independent++;
1468 /* We're not sure whether the indexes overlap. For the moment,
1469 conservatively answer "don't know". */
1470 if (dump_file && (dump_flags & TDF_DETAILS))
1471 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1473 *overlaps_a = conflict_fn_not_known ();
1474 *overlaps_b = conflict_fn_not_known ();
1475 *last_conflicts = chrec_dont_know;
1476 dependence_stats.num_ziv_unimplemented++;
1480 if (dump_file && (dump_flags & TDF_DETAILS))
1481 fprintf (dump_file, ")\n");
1484 /* Sets NIT to the estimated number of executions of the statements in
1485 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1486 large as the number of iterations. If we have no reliable estimate,
1487 the function returns false, otherwise returns true. */
1490 estimated_loop_iterations (struct loop *loop, bool conservative,
1493 estimate_numbers_of_iterations_loop (loop);
1496 if (!loop->any_upper_bound)
1499 *nit = loop->nb_iterations_upper_bound;
1503 if (!loop->any_estimate)
1506 *nit = loop->nb_iterations_estimate;
1512 /* Similar to estimated_loop_iterations, but returns the estimate only
1513 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1514 on the number of iterations of LOOP could not be derived, returns -1. */
1517 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1520 HOST_WIDE_INT hwi_nit;
1522 if (!estimated_loop_iterations (loop, conservative, &nit))
1525 if (!double_int_fits_in_shwi_p (nit))
1527 hwi_nit = double_int_to_shwi (nit);
1529 return hwi_nit < 0 ? -1 : hwi_nit;
1532 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1533 and only if it fits to the int type. If this is not the case, or the
1534 estimate on the number of iterations of LOOP could not be derived, returns
1538 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1543 if (!estimated_loop_iterations (loop, conservative, &nit))
1544 return chrec_dont_know;
1546 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1547 if (!double_int_fits_to_tree_p (type, nit))
1548 return chrec_dont_know;
1550 return double_int_to_tree (type, nit);
1553 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1554 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1555 *OVERLAPS_B are initialized to the functions that describe the
1556 relation between the elements accessed twice by CHREC_A and
1557 CHREC_B. For k >= 0, the following property is verified:
1559 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1562 analyze_siv_subscript_cst_affine (tree chrec_a,
1564 conflict_function **overlaps_a,
1565 conflict_function **overlaps_b,
1566 tree *last_conflicts)
1568 bool value0, value1, value2;
1569 tree difference, tmp;
1571 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
1572 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
1573 difference = chrec_fold_minus
1574 (integer_type_node, initial_condition (chrec_b), chrec_a);
1576 if (!chrec_is_positive (initial_condition (difference), &value0))
1578 if (dump_file && (dump_flags & TDF_DETAILS))
1579 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1581 dependence_stats.num_siv_unimplemented++;
1582 *overlaps_a = conflict_fn_not_known ();
1583 *overlaps_b = conflict_fn_not_known ();
1584 *last_conflicts = chrec_dont_know;
1589 if (value0 == false)
1591 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1593 if (dump_file && (dump_flags & TDF_DETAILS))
1594 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1596 *overlaps_a = conflict_fn_not_known ();
1597 *overlaps_b = conflict_fn_not_known ();
1598 *last_conflicts = chrec_dont_know;
1599 dependence_stats.num_siv_unimplemented++;
1608 chrec_b = {10, +, 1}
1611 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1613 HOST_WIDE_INT numiter;
1614 struct loop *loop = get_chrec_loop (chrec_b);
1616 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1617 tmp = fold_build2 (EXACT_DIV_EXPR, integer_type_node,
1618 fold_build1 (ABS_EXPR,
1621 CHREC_RIGHT (chrec_b));
1622 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1623 *last_conflicts = integer_one_node;
1626 /* Perform weak-zero siv test to see if overlap is
1627 outside the loop bounds. */
1628 numiter = estimated_loop_iterations_int (loop, true);
1631 && compare_tree_int (tmp, numiter) > 0)
1633 free_conflict_function (*overlaps_a);
1634 free_conflict_function (*overlaps_b);
1635 *overlaps_a = conflict_fn_no_dependence ();
1636 *overlaps_b = conflict_fn_no_dependence ();
1637 *last_conflicts = integer_zero_node;
1638 dependence_stats.num_siv_independent++;
1641 dependence_stats.num_siv_dependent++;
1645 /* When the step does not divide the difference, there are
1649 *overlaps_a = conflict_fn_no_dependence ();
1650 *overlaps_b = conflict_fn_no_dependence ();
1651 *last_conflicts = integer_zero_node;
1652 dependence_stats.num_siv_independent++;
1661 chrec_b = {10, +, -1}
1663 In this case, chrec_a will not overlap with chrec_b. */
1664 *overlaps_a = conflict_fn_no_dependence ();
1665 *overlaps_b = conflict_fn_no_dependence ();
1666 *last_conflicts = integer_zero_node;
1667 dependence_stats.num_siv_independent++;
1674 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1676 if (dump_file && (dump_flags & TDF_DETAILS))
1677 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1679 *overlaps_a = conflict_fn_not_known ();
1680 *overlaps_b = conflict_fn_not_known ();
1681 *last_conflicts = chrec_dont_know;
1682 dependence_stats.num_siv_unimplemented++;
1687 if (value2 == false)
1691 chrec_b = {10, +, -1}
1693 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1695 HOST_WIDE_INT numiter;
1696 struct loop *loop = get_chrec_loop (chrec_b);
1698 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1699 tmp = fold_build2 (EXACT_DIV_EXPR,
1700 integer_type_node, difference,
1701 CHREC_RIGHT (chrec_b));
1702 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1703 *last_conflicts = integer_one_node;
1705 /* Perform weak-zero siv test to see if overlap is
1706 outside the loop bounds. */
1707 numiter = estimated_loop_iterations_int (loop, true);
1710 && compare_tree_int (tmp, numiter) > 0)
1712 free_conflict_function (*overlaps_a);
1713 free_conflict_function (*overlaps_b);
1714 *overlaps_a = conflict_fn_no_dependence ();
1715 *overlaps_b = conflict_fn_no_dependence ();
1716 *last_conflicts = integer_zero_node;
1717 dependence_stats.num_siv_independent++;
1720 dependence_stats.num_siv_dependent++;
1724 /* When the step does not divide the difference, there
1728 *overlaps_a = conflict_fn_no_dependence ();
1729 *overlaps_b = conflict_fn_no_dependence ();
1730 *last_conflicts = integer_zero_node;
1731 dependence_stats.num_siv_independent++;
1741 In this case, chrec_a will not overlap with chrec_b. */
1742 *overlaps_a = conflict_fn_no_dependence ();
1743 *overlaps_b = conflict_fn_no_dependence ();
1744 *last_conflicts = integer_zero_node;
1745 dependence_stats.num_siv_independent++;
1753 /* Helper recursive function for initializing the matrix A. Returns
1754 the initial value of CHREC. */
1757 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1761 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
1762 return int_cst_value (chrec);
1764 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1765 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1768 #define FLOOR_DIV(x,y) ((x) / (y))
1770 /* Solves the special case of the Diophantine equation:
1771 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1773 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1774 number of iterations that loops X and Y run. The overlaps will be
1775 constructed as evolutions in dimension DIM. */
1778 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1779 affine_fn *overlaps_a,
1780 affine_fn *overlaps_b,
1781 tree *last_conflicts, int dim)
1783 if (((step_a > 0 && step_b > 0)
1784 || (step_a < 0 && step_b < 0)))
1786 int step_overlaps_a, step_overlaps_b;
1787 int gcd_steps_a_b, last_conflict, tau2;
1789 gcd_steps_a_b = gcd (step_a, step_b);
1790 step_overlaps_a = step_b / gcd_steps_a_b;
1791 step_overlaps_b = step_a / gcd_steps_a_b;
1793 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1794 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1795 last_conflict = tau2;
1797 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1798 build_int_cst (NULL_TREE,
1800 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1801 build_int_cst (NULL_TREE,
1803 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1808 *overlaps_a = affine_fn_cst (integer_zero_node);
1809 *overlaps_b = affine_fn_cst (integer_zero_node);
1810 *last_conflicts = integer_zero_node;
1814 /* Solves the special case of a Diophantine equation where CHREC_A is
1815 an affine bivariate function, and CHREC_B is an affine univariate
1816 function. For example,
1818 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1820 has the following overlapping functions:
1822 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1823 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1824 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1826 FORNOW: This is a specialized implementation for a case occurring in
1827 a common benchmark. Implement the general algorithm. */
1830 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1831 conflict_function **overlaps_a,
1832 conflict_function **overlaps_b,
1833 tree *last_conflicts)
1835 bool xz_p, yz_p, xyz_p;
1836 int step_x, step_y, step_z;
1837 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1838 affine_fn overlaps_a_xz, overlaps_b_xz;
1839 affine_fn overlaps_a_yz, overlaps_b_yz;
1840 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1841 affine_fn ova1, ova2, ovb;
1842 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1844 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1845 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1846 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1848 niter_x = estimated_loop_iterations_int
1849 (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
1850 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), true);
1851 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), true);
1853 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1855 if (dump_file && (dump_flags & TDF_DETAILS))
1856 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1858 *overlaps_a = conflict_fn_not_known ();
1859 *overlaps_b = conflict_fn_not_known ();
1860 *last_conflicts = chrec_dont_know;
1864 niter = MIN (niter_x, niter_z);
1865 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1868 &last_conflicts_xz, 1);
1869 niter = MIN (niter_y, niter_z);
1870 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1873 &last_conflicts_yz, 2);
1874 niter = MIN (niter_x, niter_z);
1875 niter = MIN (niter_y, niter);
1876 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1879 &last_conflicts_xyz, 3);
1881 xz_p = !integer_zerop (last_conflicts_xz);
1882 yz_p = !integer_zerop (last_conflicts_yz);
1883 xyz_p = !integer_zerop (last_conflicts_xyz);
1885 if (xz_p || yz_p || xyz_p)
1887 ova1 = affine_fn_cst (integer_zero_node);
1888 ova2 = affine_fn_cst (integer_zero_node);
1889 ovb = affine_fn_cst (integer_zero_node);
1892 affine_fn t0 = ova1;
1895 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
1896 ovb = affine_fn_plus (ovb, overlaps_b_xz);
1897 affine_fn_free (t0);
1898 affine_fn_free (t2);
1899 *last_conflicts = last_conflicts_xz;
1903 affine_fn t0 = ova2;
1906 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
1907 ovb = affine_fn_plus (ovb, overlaps_b_yz);
1908 affine_fn_free (t0);
1909 affine_fn_free (t2);
1910 *last_conflicts = last_conflicts_yz;
1914 affine_fn t0 = ova1;
1915 affine_fn t2 = ova2;
1918 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
1919 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
1920 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
1921 affine_fn_free (t0);
1922 affine_fn_free (t2);
1923 affine_fn_free (t4);
1924 *last_conflicts = last_conflicts_xyz;
1926 *overlaps_a = conflict_fn (2, ova1, ova2);
1927 *overlaps_b = conflict_fn (1, ovb);
1931 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1932 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1933 *last_conflicts = integer_zero_node;
1936 affine_fn_free (overlaps_a_xz);
1937 affine_fn_free (overlaps_b_xz);
1938 affine_fn_free (overlaps_a_yz);
1939 affine_fn_free (overlaps_b_yz);
1940 affine_fn_free (overlaps_a_xyz);
1941 affine_fn_free (overlaps_b_xyz);
1944 /* Determines the overlapping elements due to accesses CHREC_A and
1945 CHREC_B, that are affine functions. This function cannot handle
1946 symbolic evolution functions, ie. when initial conditions are
1947 parameters, because it uses lambda matrices of integers. */
1950 analyze_subscript_affine_affine (tree chrec_a,
1952 conflict_function **overlaps_a,
1953 conflict_function **overlaps_b,
1954 tree *last_conflicts)
1956 unsigned nb_vars_a, nb_vars_b, dim;
1957 int init_a, init_b, gamma, gcd_alpha_beta;
1959 lambda_matrix A, U, S;
1961 if (eq_evolutions_p (chrec_a, chrec_b))
1963 /* The accessed index overlaps for each iteration in the
1965 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1966 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1967 *last_conflicts = chrec_dont_know;
1970 if (dump_file && (dump_flags & TDF_DETAILS))
1971 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
1973 /* For determining the initial intersection, we have to solve a
1974 Diophantine equation. This is the most time consuming part.
1976 For answering to the question: "Is there a dependence?" we have
1977 to prove that there exists a solution to the Diophantine
1978 equation, and that the solution is in the iteration domain,
1979 i.e. the solution is positive or zero, and that the solution
1980 happens before the upper bound loop.nb_iterations. Otherwise
1981 there is no dependence. This function outputs a description of
1982 the iterations that hold the intersections. */
1984 nb_vars_a = nb_vars_in_chrec (chrec_a);
1985 nb_vars_b = nb_vars_in_chrec (chrec_b);
1987 dim = nb_vars_a + nb_vars_b;
1988 U = lambda_matrix_new (dim, dim);
1989 A = lambda_matrix_new (dim, 1);
1990 S = lambda_matrix_new (dim, 1);
1992 init_a = initialize_matrix_A (A, chrec_a, 0, 1);
1993 init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
1994 gamma = init_b - init_a;
1996 /* Don't do all the hard work of solving the Diophantine equation
1997 when we already know the solution: for example,
2000 | gamma = 3 - 3 = 0.
2001 Then the first overlap occurs during the first iterations:
2002 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2006 if (nb_vars_a == 1 && nb_vars_b == 1)
2009 HOST_WIDE_INT niter, niter_a, niter_b;
2012 niter_a = estimated_loop_iterations_int
2013 (get_chrec_loop (chrec_a), true);
2014 niter_b = estimated_loop_iterations_int
2015 (get_chrec_loop (chrec_b), true);
2016 if (niter_a < 0 || niter_b < 0)
2018 if (dump_file && (dump_flags & TDF_DETAILS))
2019 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2020 *overlaps_a = conflict_fn_not_known ();
2021 *overlaps_b = conflict_fn_not_known ();
2022 *last_conflicts = chrec_dont_know;
2023 goto end_analyze_subs_aa;
2026 niter = MIN (niter_a, niter_b);
2028 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2029 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2031 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2034 *overlaps_a = conflict_fn (1, ova);
2035 *overlaps_b = conflict_fn (1, ovb);
2038 else if (nb_vars_a == 2 && nb_vars_b == 1)
2039 compute_overlap_steps_for_affine_1_2
2040 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2042 else if (nb_vars_a == 1 && nb_vars_b == 2)
2043 compute_overlap_steps_for_affine_1_2
2044 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2048 if (dump_file && (dump_flags & TDF_DETAILS))
2049 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2050 *overlaps_a = conflict_fn_not_known ();
2051 *overlaps_b = conflict_fn_not_known ();
2052 *last_conflicts = chrec_dont_know;
2054 goto end_analyze_subs_aa;
2058 lambda_matrix_right_hermite (A, dim, 1, S, U);
2063 lambda_matrix_row_negate (U, dim, 0);
2065 gcd_alpha_beta = S[0][0];
2067 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2068 but that is a quite strange case. Instead of ICEing, answer
2070 if (gcd_alpha_beta == 0)
2072 *overlaps_a = conflict_fn_not_known ();
2073 *overlaps_b = conflict_fn_not_known ();
2074 *last_conflicts = chrec_dont_know;
2075 goto end_analyze_subs_aa;
2078 /* The classic "gcd-test". */
2079 if (!int_divides_p (gcd_alpha_beta, gamma))
2081 /* The "gcd-test" has determined that there is no integer
2082 solution, i.e. there is no dependence. */
2083 *overlaps_a = conflict_fn_no_dependence ();
2084 *overlaps_b = conflict_fn_no_dependence ();
2085 *last_conflicts = integer_zero_node;
2088 /* Both access functions are univariate. This includes SIV and MIV cases. */
2089 else if (nb_vars_a == 1 && nb_vars_b == 1)
2091 /* Both functions should have the same evolution sign. */
2092 if (((A[0][0] > 0 && -A[1][0] > 0)
2093 || (A[0][0] < 0 && -A[1][0] < 0)))
2095 /* The solutions are given by:
2097 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2100 For a given integer t. Using the following variables,
2102 | i0 = u11 * gamma / gcd_alpha_beta
2103 | j0 = u12 * gamma / gcd_alpha_beta
2110 | y0 = j0 + j1 * t. */
2114 /* X0 and Y0 are the first iterations for which there is a
2115 dependence. X0, Y0 are two solutions of the Diophantine
2116 equation: chrec_a (X0) = chrec_b (Y0). */
2118 int niter, niter_a, niter_b;
2120 niter_a = estimated_loop_iterations_int
2121 (get_chrec_loop (chrec_a), true);
2122 niter_b = estimated_loop_iterations_int
2123 (get_chrec_loop (chrec_b), true);
2125 if (niter_a < 0 || niter_b < 0)
2127 if (dump_file && (dump_flags & TDF_DETAILS))
2128 fprintf (dump_file, "affine-affine test failed: missing iteration counts.\n");
2129 *overlaps_a = conflict_fn_not_known ();
2130 *overlaps_b = conflict_fn_not_known ();
2131 *last_conflicts = chrec_dont_know;
2132 goto end_analyze_subs_aa;
2135 niter = MIN (niter_a, niter_b);
2137 i0 = U[0][0] * gamma / gcd_alpha_beta;
2138 j0 = U[0][1] * gamma / gcd_alpha_beta;
2142 if ((i1 == 0 && i0 < 0)
2143 || (j1 == 0 && j0 < 0))
2145 /* There is no solution.
2146 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2147 falls in here, but for the moment we don't look at the
2148 upper bound of the iteration domain. */
2149 *overlaps_a = conflict_fn_no_dependence ();
2150 *overlaps_b = conflict_fn_no_dependence ();
2151 *last_conflicts = integer_zero_node;
2158 tau1 = CEIL (-i0, i1);
2159 tau2 = FLOOR_DIV (niter - i0, i1);
2163 int last_conflict, min_multiple;
2164 tau1 = MAX (tau1, CEIL (-j0, j1));
2165 tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1));
2167 x0 = i1 * tau1 + i0;
2168 y0 = j1 * tau1 + j0;
2170 /* At this point (x0, y0) is one of the
2171 solutions to the Diophantine equation. The
2172 next step has to compute the smallest
2173 positive solution: the first conflicts. */
2174 min_multiple = MIN (x0 / i1, y0 / j1);
2175 x0 -= i1 * min_multiple;
2176 y0 -= j1 * min_multiple;
2178 tau1 = (x0 - i0)/i1;
2179 last_conflict = tau2 - tau1;
2181 /* If the overlap occurs outside of the bounds of the
2182 loop, there is no dependence. */
2183 if (x0 > niter || y0 > niter)
2185 *overlaps_a = conflict_fn_no_dependence ();
2186 *overlaps_b = conflict_fn_no_dependence ();
2187 *last_conflicts = integer_zero_node;
2193 affine_fn_univar (build_int_cst (NULL_TREE, x0),
2195 build_int_cst (NULL_TREE, i1)));
2198 affine_fn_univar (build_int_cst (NULL_TREE, y0),
2200 build_int_cst (NULL_TREE, j1)));
2201 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2206 /* FIXME: For the moment, the upper bound of the
2207 iteration domain for j is not checked. */
2208 if (dump_file && (dump_flags & TDF_DETAILS))
2209 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2210 *overlaps_a = conflict_fn_not_known ();
2211 *overlaps_b = conflict_fn_not_known ();
2212 *last_conflicts = chrec_dont_know;
2218 /* FIXME: For the moment, the upper bound of the
2219 iteration domain for i is not checked. */
2220 if (dump_file && (dump_flags & TDF_DETAILS))
2221 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2222 *overlaps_a = conflict_fn_not_known ();
2223 *overlaps_b = conflict_fn_not_known ();
2224 *last_conflicts = chrec_dont_know;
2230 if (dump_file && (dump_flags & TDF_DETAILS))
2231 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2232 *overlaps_a = conflict_fn_not_known ();
2233 *overlaps_b = conflict_fn_not_known ();
2234 *last_conflicts = chrec_dont_know;
2240 if (dump_file && (dump_flags & TDF_DETAILS))
2241 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2242 *overlaps_a = conflict_fn_not_known ();
2243 *overlaps_b = conflict_fn_not_known ();
2244 *last_conflicts = chrec_dont_know;
2247 end_analyze_subs_aa:
2248 if (dump_file && (dump_flags & TDF_DETAILS))
2250 fprintf (dump_file, " (overlaps_a = ");
2251 dump_conflict_function (dump_file, *overlaps_a);
2252 fprintf (dump_file, ")\n (overlaps_b = ");
2253 dump_conflict_function (dump_file, *overlaps_b);
2254 fprintf (dump_file, ")\n");
2255 fprintf (dump_file, ")\n");
2259 /* Returns true when analyze_subscript_affine_affine can be used for
2260 determining the dependence relation between chrec_a and chrec_b,
2261 that contain symbols. This function modifies chrec_a and chrec_b
2262 such that the analysis result is the same, and such that they don't
2263 contain symbols, and then can safely be passed to the analyzer.
2265 Example: The analysis of the following tuples of evolutions produce
2266 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2269 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2270 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2274 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2276 tree diff, type, left_a, left_b, right_b;
2278 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2279 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2280 /* FIXME: For the moment not handled. Might be refined later. */
2283 type = chrec_type (*chrec_a);
2284 left_a = CHREC_LEFT (*chrec_a);
2285 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL_TREE);
2286 diff = chrec_fold_minus (type, left_a, left_b);
2288 if (!evolution_function_is_constant_p (diff))
2291 if (dump_file && (dump_flags & TDF_DETAILS))
2292 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2294 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2295 diff, CHREC_RIGHT (*chrec_a));
2296 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL_TREE);
2297 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2298 build_int_cst (type, 0),
2303 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2304 *OVERLAPS_B are initialized to the functions that describe the
2305 relation between the elements accessed twice by CHREC_A and
2306 CHREC_B. For k >= 0, the following property is verified:
2308 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2311 analyze_siv_subscript (tree chrec_a,
2313 conflict_function **overlaps_a,
2314 conflict_function **overlaps_b,
2315 tree *last_conflicts)
2317 dependence_stats.num_siv++;
2319 if (dump_file && (dump_flags & TDF_DETAILS))
2320 fprintf (dump_file, "(analyze_siv_subscript \n");
2322 if (evolution_function_is_constant_p (chrec_a)
2323 && evolution_function_is_affine_p (chrec_b))
2324 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2325 overlaps_a, overlaps_b, last_conflicts);
2327 else if (evolution_function_is_affine_p (chrec_a)
2328 && evolution_function_is_constant_p (chrec_b))
2329 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2330 overlaps_b, overlaps_a, last_conflicts);
2332 else if (evolution_function_is_affine_p (chrec_a)
2333 && evolution_function_is_affine_p (chrec_b))
2335 if (!chrec_contains_symbols (chrec_a)
2336 && !chrec_contains_symbols (chrec_b))
2338 analyze_subscript_affine_affine (chrec_a, chrec_b,
2339 overlaps_a, overlaps_b,
2342 if (CF_NOT_KNOWN_P (*overlaps_a)
2343 || CF_NOT_KNOWN_P (*overlaps_b))
2344 dependence_stats.num_siv_unimplemented++;
2345 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2346 || CF_NO_DEPENDENCE_P (*overlaps_b))
2347 dependence_stats.num_siv_independent++;
2349 dependence_stats.num_siv_dependent++;
2351 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2354 analyze_subscript_affine_affine (chrec_a, chrec_b,
2355 overlaps_a, overlaps_b,
2357 /* FIXME: The number of iterations is a symbolic expression.
2358 Compute it properly. */
2359 *last_conflicts = chrec_dont_know;
2361 if (CF_NOT_KNOWN_P (*overlaps_a)
2362 || CF_NOT_KNOWN_P (*overlaps_b))
2363 dependence_stats.num_siv_unimplemented++;
2364 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2365 || CF_NO_DEPENDENCE_P (*overlaps_b))
2366 dependence_stats.num_siv_independent++;
2368 dependence_stats.num_siv_dependent++;
2371 goto siv_subscript_dontknow;
2376 siv_subscript_dontknow:;
2377 if (dump_file && (dump_flags & TDF_DETAILS))
2378 fprintf (dump_file, "siv test failed: unimplemented.\n");
2379 *overlaps_a = conflict_fn_not_known ();
2380 *overlaps_b = conflict_fn_not_known ();
2381 *last_conflicts = chrec_dont_know;
2382 dependence_stats.num_siv_unimplemented++;
2385 if (dump_file && (dump_flags & TDF_DETAILS))
2386 fprintf (dump_file, ")\n");
2389 /* Returns false if we can prove that the greatest common divisor of the steps
2390 of CHREC does not divide CST, false otherwise. */
2393 gcd_of_steps_may_divide_p (tree chrec, tree cst)
2395 HOST_WIDE_INT cd = 0, val;
2398 if (!host_integerp (cst, 0))
2400 val = tree_low_cst (cst, 0);
2402 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2404 step = CHREC_RIGHT (chrec);
2405 if (!host_integerp (step, 0))
2407 cd = gcd (cd, tree_low_cst (step, 0));
2408 chrec = CHREC_LEFT (chrec);
2411 return val % cd == 0;
2414 /* Analyze a MIV (Multiple Index Variable) subscript. *OVERLAPS_A and
2415 *OVERLAPS_B are initialized to the functions that describe the
2416 relation between the elements accessed twice by CHREC_A and
2417 CHREC_B. For k >= 0, the following property is verified:
2419 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2422 analyze_miv_subscript (tree chrec_a,
2424 conflict_function **overlaps_a,
2425 conflict_function **overlaps_b,
2426 tree *last_conflicts)
2428 /* FIXME: This is a MIV subscript, not yet handled.
2429 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2432 In the SIV test we had to solve a Diophantine equation with two
2433 variables. In the MIV case we have to solve a Diophantine
2434 equation with 2*n variables (if the subscript uses n IVs).
2437 dependence_stats.num_miv++;
2438 if (dump_file && (dump_flags & TDF_DETAILS))
2439 fprintf (dump_file, "(analyze_miv_subscript \n");
2441 chrec_a = chrec_convert (integer_type_node, chrec_a, NULL_TREE);
2442 chrec_b = chrec_convert (integer_type_node, chrec_b, NULL_TREE);
2443 difference = chrec_fold_minus (integer_type_node, chrec_a, chrec_b);
2445 if (eq_evolutions_p (chrec_a, chrec_b))
2447 /* Access functions are the same: all the elements are accessed
2448 in the same order. */
2449 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2450 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2451 *last_conflicts = estimated_loop_iterations_tree
2452 (get_chrec_loop (chrec_a), true);
2453 dependence_stats.num_miv_dependent++;
2456 else if (evolution_function_is_constant_p (difference)
2457 /* For the moment, the following is verified:
2458 evolution_function_is_affine_multivariate_p (chrec_a, 0) */
2459 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2461 /* testsuite/.../ssa-chrec-33.c
2462 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2464 The difference is 1, and all the evolution steps are multiples
2465 of 2, consequently there are no overlapping elements. */
2466 *overlaps_a = conflict_fn_no_dependence ();
2467 *overlaps_b = conflict_fn_no_dependence ();
2468 *last_conflicts = integer_zero_node;
2469 dependence_stats.num_miv_independent++;
2472 else if (evolution_function_is_affine_multivariate_p (chrec_a, 0)
2473 && !chrec_contains_symbols (chrec_a)
2474 && evolution_function_is_affine_multivariate_p (chrec_b, 0)
2475 && !chrec_contains_symbols (chrec_b))
2477 /* testsuite/.../ssa-chrec-35.c
2478 {0, +, 1}_2 vs. {0, +, 1}_3
2479 the overlapping elements are respectively located at iterations:
2480 {0, +, 1}_x and {0, +, 1}_x,
2481 in other words, we have the equality:
2482 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2485 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2486 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2488 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2489 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2491 analyze_subscript_affine_affine (chrec_a, chrec_b,
2492 overlaps_a, overlaps_b, last_conflicts);
2494 if (CF_NOT_KNOWN_P (*overlaps_a)
2495 || CF_NOT_KNOWN_P (*overlaps_b))
2496 dependence_stats.num_miv_unimplemented++;
2497 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2498 || CF_NO_DEPENDENCE_P (*overlaps_b))
2499 dependence_stats.num_miv_independent++;
2501 dependence_stats.num_miv_dependent++;
2506 /* When the analysis is too difficult, answer "don't know". */
2507 if (dump_file && (dump_flags & TDF_DETAILS))
2508 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2510 *overlaps_a = conflict_fn_not_known ();
2511 *overlaps_b = conflict_fn_not_known ();
2512 *last_conflicts = chrec_dont_know;
2513 dependence_stats.num_miv_unimplemented++;
2516 if (dump_file && (dump_flags & TDF_DETAILS))
2517 fprintf (dump_file, ")\n");
2520 /* Determines the iterations for which CHREC_A is equal to CHREC_B.
2521 OVERLAP_ITERATIONS_A and OVERLAP_ITERATIONS_B are initialized with
2522 two functions that describe the iterations that contain conflicting
2525 Remark: For an integer k >= 0, the following equality is true:
2527 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2531 analyze_overlapping_iterations (tree chrec_a,
2533 conflict_function **overlap_iterations_a,
2534 conflict_function **overlap_iterations_b,
2535 tree *last_conflicts)
2537 dependence_stats.num_subscript_tests++;
2539 if (dump_file && (dump_flags & TDF_DETAILS))
2541 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2542 fprintf (dump_file, " (chrec_a = ");
2543 print_generic_expr (dump_file, chrec_a, 0);
2544 fprintf (dump_file, ")\n (chrec_b = ");
2545 print_generic_expr (dump_file, chrec_b, 0);
2546 fprintf (dump_file, ")\n");
2549 if (chrec_a == NULL_TREE
2550 || chrec_b == NULL_TREE
2551 || chrec_contains_undetermined (chrec_a)
2552 || chrec_contains_undetermined (chrec_b))
2554 dependence_stats.num_subscript_undetermined++;
2556 *overlap_iterations_a = conflict_fn_not_known ();
2557 *overlap_iterations_b = conflict_fn_not_known ();
2560 /* If they are the same chrec, and are affine, they overlap
2561 on every iteration. */
2562 else if (eq_evolutions_p (chrec_a, chrec_b)
2563 && evolution_function_is_affine_multivariate_p (chrec_a, 0))
2565 dependence_stats.num_same_subscript_function++;
2566 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2567 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2568 *last_conflicts = chrec_dont_know;
2571 /* If they aren't the same, and aren't affine, we can't do anything
2573 else if ((chrec_contains_symbols (chrec_a)
2574 || chrec_contains_symbols (chrec_b))
2575 && (!evolution_function_is_affine_multivariate_p (chrec_a, 0)
2576 || !evolution_function_is_affine_multivariate_p (chrec_b, 0)))
2578 dependence_stats.num_subscript_undetermined++;
2579 *overlap_iterations_a = conflict_fn_not_known ();
2580 *overlap_iterations_b = conflict_fn_not_known ();
2583 else if (ziv_subscript_p (chrec_a, chrec_b))
2584 analyze_ziv_subscript (chrec_a, chrec_b,
2585 overlap_iterations_a, overlap_iterations_b,
2588 else if (siv_subscript_p (chrec_a, chrec_b))
2589 analyze_siv_subscript (chrec_a, chrec_b,
2590 overlap_iterations_a, overlap_iterations_b,
2594 analyze_miv_subscript (chrec_a, chrec_b,
2595 overlap_iterations_a, overlap_iterations_b,
2598 if (dump_file && (dump_flags & TDF_DETAILS))
2600 fprintf (dump_file, " (overlap_iterations_a = ");
2601 dump_conflict_function (dump_file, *overlap_iterations_a);
2602 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2603 dump_conflict_function (dump_file, *overlap_iterations_b);
2604 fprintf (dump_file, ")\n");
2605 fprintf (dump_file, ")\n");
2609 /* Helper function for uniquely inserting distance vectors. */
2612 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2617 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2618 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2621 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2624 /* Helper function for uniquely inserting direction vectors. */
2627 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2632 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2633 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2636 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2639 /* Add a distance of 1 on all the loops outer than INDEX. If we
2640 haven't yet determined a distance for this outer loop, push a new
2641 distance vector composed of the previous distance, and a distance
2642 of 1 for this outer loop. Example:
2650 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2651 save (0, 1), then we have to save (1, 0). */
2654 add_outer_distances (struct data_dependence_relation *ddr,
2655 lambda_vector dist_v, int index)
2657 /* For each outer loop where init_v is not set, the accesses are
2658 in dependence of distance 1 in the loop. */
2659 while (--index >= 0)
2661 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2662 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2664 save_dist_v (ddr, save_v);
2668 /* Return false when fail to represent the data dependence as a
2669 distance vector. INIT_B is set to true when a component has been
2670 added to the distance vector DIST_V. INDEX_CARRY is then set to
2671 the index in DIST_V that carries the dependence. */
2674 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2675 struct data_reference *ddr_a,
2676 struct data_reference *ddr_b,
2677 lambda_vector dist_v, bool *init_b,
2681 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2683 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2685 tree access_fn_a, access_fn_b;
2686 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2688 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2690 non_affine_dependence_relation (ddr);
2694 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2695 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2697 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2698 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2701 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2702 DDR_LOOP_NEST (ddr));
2703 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2704 DDR_LOOP_NEST (ddr));
2706 /* The dependence is carried by the outermost loop. Example:
2713 In this case, the dependence is carried by loop_1. */
2714 index = index_a < index_b ? index_a : index_b;
2715 *index_carry = MIN (index, *index_carry);
2717 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2719 non_affine_dependence_relation (ddr);
2723 dist = int_cst_value (SUB_DISTANCE (subscript));
2725 /* This is the subscript coupling test. If we have already
2726 recorded a distance for this loop (a distance coming from
2727 another subscript), it should be the same. For example,
2728 in the following code, there is no dependence:
2735 if (init_v[index] != 0 && dist_v[index] != dist)
2737 finalize_ddr_dependent (ddr, chrec_known);
2741 dist_v[index] = dist;
2745 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2747 /* This can be for example an affine vs. constant dependence
2748 (T[i] vs. T[3]) that is not an affine dependence and is
2749 not representable as a distance vector. */
2750 non_affine_dependence_relation (ddr);
2758 /* Return true when the DDR contains two data references that have the
2759 same access functions. */
2762 same_access_functions (struct data_dependence_relation *ddr)
2766 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2767 if (!eq_evolutions_p (DR_ACCESS_FN (DDR_A (ddr), i),
2768 DR_ACCESS_FN (DDR_B (ddr), i)))
2774 /* Return true when the DDR contains only constant access functions. */
2777 constant_access_functions (struct data_dependence_relation *ddr)
2781 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2782 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2783 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2790 /* Helper function for the case where DDR_A and DDR_B are the same
2791 multivariate access function. */
2794 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2797 tree c_1 = CHREC_LEFT (c_2);
2798 tree c_0 = CHREC_LEFT (c_1);
2799 lambda_vector dist_v;
2802 /* Polynomials with more than 2 variables are not handled yet. */
2803 if (TREE_CODE (c_0) != INTEGER_CST)
2805 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2809 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2810 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2812 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2813 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2814 v1 = int_cst_value (CHREC_RIGHT (c_1));
2815 v2 = int_cst_value (CHREC_RIGHT (c_2));
2828 save_dist_v (ddr, dist_v);
2830 add_outer_distances (ddr, dist_v, x_1);
2833 /* Helper function for the case where DDR_A and DDR_B are the same
2834 access functions. */
2837 add_other_self_distances (struct data_dependence_relation *ddr)
2839 lambda_vector dist_v;
2841 int index_carry = DDR_NB_LOOPS (ddr);
2843 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2845 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2847 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2849 if (!evolution_function_is_univariate_p (access_fun))
2851 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2853 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2857 add_multivariate_self_dist (ddr, DR_ACCESS_FN (DDR_A (ddr), 0));
2861 index_carry = MIN (index_carry,
2862 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2863 DDR_LOOP_NEST (ddr)));
2867 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2868 add_outer_distances (ddr, dist_v, index_carry);
2872 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2874 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2876 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2877 save_dist_v (ddr, dist_v);
2880 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2881 is the case for example when access functions are the same and
2882 equal to a constant, as in:
2889 in which case the distance vectors are (0) and (1). */
2892 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
2896 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2898 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
2899 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
2900 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
2902 for (j = 0; j < ca->n; j++)
2903 if (affine_function_zero_p (ca->fns[j]))
2905 insert_innermost_unit_dist_vector (ddr);
2909 for (j = 0; j < cb->n; j++)
2910 if (affine_function_zero_p (cb->fns[j]))
2912 insert_innermost_unit_dist_vector (ddr);
2918 /* Compute the classic per loop distance vector. DDR is the data
2919 dependence relation to build a vector from. Return false when fail
2920 to represent the data dependence as a distance vector. */
2923 build_classic_dist_vector (struct data_dependence_relation *ddr)
2925 bool init_b = false;
2926 int index_carry = DDR_NB_LOOPS (ddr);
2927 lambda_vector dist_v;
2929 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
2932 if (same_access_functions (ddr))
2934 /* Save the 0 vector. */
2935 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2936 save_dist_v (ddr, dist_v);
2938 if (constant_access_functions (ddr))
2939 add_distance_for_zero_overlaps (ddr);
2941 if (DDR_NB_LOOPS (ddr) > 1)
2942 add_other_self_distances (ddr);
2947 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2948 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
2949 dist_v, &init_b, &index_carry))
2952 /* Save the distance vector if we initialized one. */
2955 /* Verify a basic constraint: classic distance vectors should
2956 always be lexicographically positive.
2958 Data references are collected in the order of execution of
2959 the program, thus for the following loop
2961 | for (i = 1; i < 100; i++)
2962 | for (j = 1; j < 100; j++)
2964 | t = T[j+1][i-1]; // A
2965 | T[j][i] = t + 2; // B
2968 references are collected following the direction of the wind:
2969 A then B. The data dependence tests are performed also
2970 following this order, such that we're looking at the distance
2971 separating the elements accessed by A from the elements later
2972 accessed by B. But in this example, the distance returned by
2973 test_dep (A, B) is lexicographically negative (-1, 1), that
2974 means that the access A occurs later than B with respect to
2975 the outer loop, ie. we're actually looking upwind. In this
2976 case we solve test_dep (B, A) looking downwind to the
2977 lexicographically positive solution, that returns the
2978 distance vector (1, -1). */
2979 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
2981 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2982 subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr));
2983 compute_subscript_distance (ddr);
2984 build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
2985 save_v, &init_b, &index_carry);
2986 save_dist_v (ddr, save_v);
2988 /* In this case there is a dependence forward for all the
2991 | for (k = 1; k < 100; k++)
2992 | for (i = 1; i < 100; i++)
2993 | for (j = 1; j < 100; j++)
2995 | t = T[j+1][i-1]; // A
2996 | T[j][i] = t + 2; // B
3004 if (DDR_NB_LOOPS (ddr) > 1)
3006 add_outer_distances (ddr, save_v, index_carry);
3007 add_outer_distances (ddr, dist_v, index_carry);
3012 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3013 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3014 save_dist_v (ddr, save_v);
3016 if (DDR_NB_LOOPS (ddr) > 1)
3018 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3020 subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr));
3021 compute_subscript_distance (ddr);
3022 build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3023 opposite_v, &init_b, &index_carry);
3025 add_outer_distances (ddr, dist_v, index_carry);
3026 add_outer_distances (ddr, opposite_v, index_carry);
3032 /* There is a distance of 1 on all the outer loops: Example:
3033 there is a dependence of distance 1 on loop_1 for the array A.
3039 add_outer_distances (ddr, dist_v,
3040 lambda_vector_first_nz (dist_v,
3041 DDR_NB_LOOPS (ddr), 0));
3044 if (dump_file && (dump_flags & TDF_DETAILS))
3048 fprintf (dump_file, "(build_classic_dist_vector\n");
3049 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3051 fprintf (dump_file, " dist_vector = (");
3052 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3053 DDR_NB_LOOPS (ddr));
3054 fprintf (dump_file, " )\n");
3056 fprintf (dump_file, ")\n");
3062 /* Return the direction for a given distance.
3063 FIXME: Computing dir this way is suboptimal, since dir can catch
3064 cases that dist is unable to represent. */
3066 static inline enum data_dependence_direction
3067 dir_from_dist (int dist)
3070 return dir_positive;
3072 return dir_negative;
3077 /* Compute the classic per loop direction vector. DDR is the data
3078 dependence relation to build a vector from. */
3081 build_classic_dir_vector (struct data_dependence_relation *ddr)
3084 lambda_vector dist_v;
3086 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3088 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3090 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3091 dir_v[j] = dir_from_dist (dist_v[j]);
3093 save_dir_v (ddr, dir_v);
3097 /* Helper function. Returns true when there is a dependence between
3098 data references DRA and DRB. */
3101 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3102 struct data_reference *dra,
3103 struct data_reference *drb)
3106 tree last_conflicts;
3107 struct subscript *subscript;
3109 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3112 conflict_function *overlaps_a, *overlaps_b;
3114 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3115 DR_ACCESS_FN (drb, i),
3116 &overlaps_a, &overlaps_b,
3119 if (CF_NOT_KNOWN_P (overlaps_a)
3120 || CF_NOT_KNOWN_P (overlaps_b))
3122 finalize_ddr_dependent (ddr, chrec_dont_know);
3123 dependence_stats.num_dependence_undetermined++;
3124 free_conflict_function (overlaps_a);
3125 free_conflict_function (overlaps_b);
3129 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3130 || CF_NO_DEPENDENCE_P (overlaps_b))
3132 finalize_ddr_dependent (ddr, chrec_known);
3133 dependence_stats.num_dependence_independent++;
3134 free_conflict_function (overlaps_a);
3135 free_conflict_function (overlaps_b);
3141 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3142 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3143 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3150 /* Computes the conflicting iterations, and initialize DDR. */
3153 subscript_dependence_tester (struct data_dependence_relation *ddr)
3156 if (dump_file && (dump_flags & TDF_DETAILS))
3157 fprintf (dump_file, "(subscript_dependence_tester \n");
3159 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr)))
3160 dependence_stats.num_dependence_dependent++;
3162 compute_subscript_distance (ddr);
3163 if (build_classic_dist_vector (ddr))
3164 build_classic_dir_vector (ddr);
3166 if (dump_file && (dump_flags & TDF_DETAILS))
3167 fprintf (dump_file, ")\n");
3170 /* Returns true when all the access functions of A are affine or
3174 access_functions_are_affine_or_constant_p (struct data_reference *a)
3177 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3180 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3181 if (!evolution_function_is_constant_p (t)
3182 && !evolution_function_is_affine_multivariate_p (t, 0))
3188 /* Initializes an equation for an OMEGA problem using the information
3189 contained in the ACCESS_FUN. Returns true when the operation
3192 PB is the omega constraint system.
3193 EQ is the number of the equation to be initialized.
3194 OFFSET is used for shifting the variables names in the constraints:
3195 a constrain is composed of 2 * the number of variables surrounding
3196 dependence accesses. OFFSET is set either to 0 for the first n variables,
3197 then it is set to n.
3198 ACCESS_FUN is expected to be an affine chrec. */
3201 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3202 unsigned int offset, tree access_fun,
3203 struct data_dependence_relation *ddr)
3205 switch (TREE_CODE (access_fun))
3207 case POLYNOMIAL_CHREC:
3209 tree left = CHREC_LEFT (access_fun);
3210 tree right = CHREC_RIGHT (access_fun);
3211 int var = CHREC_VARIABLE (access_fun);
3214 if (TREE_CODE (right) != INTEGER_CST)
3217 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3218 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3220 /* Compute the innermost loop index. */
3221 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3224 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3225 += int_cst_value (right);
3227 switch (TREE_CODE (left))
3229 case POLYNOMIAL_CHREC:
3230 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3233 pb->eqs[eq].coef[0] += int_cst_value (left);
3242 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3250 /* As explained in the comments preceding init_omega_for_ddr, we have
3251 to set up a system for each loop level, setting outer loops
3252 variation to zero, and current loop variation to positive or zero.
3253 Save each lexico positive distance vector. */
3256 omega_extract_distance_vectors (omega_pb pb,
3257 struct data_dependence_relation *ddr)
3261 struct loop *loopi, *loopj;
3262 enum omega_result res;
3264 /* Set a new problem for each loop in the nest. The basis is the
3265 problem that we have initialized until now. On top of this we
3266 add new constraints. */
3267 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3268 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3271 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3272 DDR_NB_LOOPS (ddr));
3274 omega_copy_problem (copy, pb);
3276 /* For all the outer loops "loop_j", add "dj = 0". */
3278 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3280 eq = omega_add_zero_eq (copy, omega_black);
3281 copy->eqs[eq].coef[j + 1] = 1;
3284 /* For "loop_i", add "0 <= di". */
3285 geq = omega_add_zero_geq (copy, omega_black);
3286 copy->geqs[geq].coef[i + 1] = 1;
3288 /* Reduce the constraint system, and test that the current
3289 problem is feasible. */
3290 res = omega_simplify_problem (copy);
3291 if (res == omega_false
3292 || res == omega_unknown
3293 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3296 for (eq = 0; eq < copy->num_subs; eq++)
3297 if (copy->subs[eq].key == (int) i + 1)
3299 dist = copy->subs[eq].coef[0];
3305 /* Reinitialize problem... */
3306 omega_copy_problem (copy, pb);
3308 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3310 eq = omega_add_zero_eq (copy, omega_black);
3311 copy->eqs[eq].coef[j + 1] = 1;
3314 /* ..., but this time "di = 1". */
3315 eq = omega_add_zero_eq (copy, omega_black);
3316 copy->eqs[eq].coef[i + 1] = 1;
3317 copy->eqs[eq].coef[0] = -1;
3319 res = omega_simplify_problem (copy);
3320 if (res == omega_false
3321 || res == omega_unknown
3322 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3325 for (eq = 0; eq < copy->num_subs; eq++)
3326 if (copy->subs[eq].key == (int) i + 1)
3328 dist = copy->subs[eq].coef[0];
3334 /* Save the lexicographically positive distance vector. */
3337 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3338 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3342 for (eq = 0; eq < copy->num_subs; eq++)
3343 if (copy->subs[eq].key > 0)
3345 dist = copy->subs[eq].coef[0];
3346 dist_v[copy->subs[eq].key - 1] = dist;
3349 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3350 dir_v[j] = dir_from_dist (dist_v[j]);
3352 save_dist_v (ddr, dist_v);
3353 save_dir_v (ddr, dir_v);
3357 omega_free_problem (copy);
3361 /* This is called for each subscript of a tuple of data references:
3362 insert an equality for representing the conflicts. */
3365 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3366 struct data_dependence_relation *ddr,
3367 omega_pb pb, bool *maybe_dependent)
3370 tree fun_a = chrec_convert (integer_type_node, access_fun_a, NULL_TREE);
3371 tree fun_b = chrec_convert (integer_type_node, access_fun_b, NULL_TREE);
3372 tree difference = chrec_fold_minus (integer_type_node, fun_a, fun_b);
3374 /* When the fun_a - fun_b is not constant, the dependence is not
3375 captured by the classic distance vector representation. */
3376 if (TREE_CODE (difference) != INTEGER_CST)
3380 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3382 /* There is no dependence. */
3383 *maybe_dependent = false;
3387 fun_b = chrec_fold_multiply (integer_type_node, fun_b,
3388 integer_minus_one_node);
3390 eq = omega_add_zero_eq (pb, omega_black);
3391 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3392 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3393 /* There is probably a dependence, but the system of
3394 constraints cannot be built: answer "don't know". */
3398 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3399 && !int_divides_p (lambda_vector_gcd
3400 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3401 2 * DDR_NB_LOOPS (ddr)),
3402 pb->eqs[eq].coef[0]))
3404 /* There is no dependence. */
3405 *maybe_dependent = false;
3412 /* Helper function, same as init_omega_for_ddr but specialized for
3413 data references A and B. */
3416 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3417 struct data_dependence_relation *ddr,
3418 omega_pb pb, bool *maybe_dependent)
3423 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3425 /* Insert an equality per subscript. */
3426 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3428 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3429 ddr, pb, maybe_dependent))
3431 else if (*maybe_dependent == false)
3433 /* There is no dependence. */
3434 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3439 /* Insert inequalities: constraints corresponding to the iteration
3440 domain, i.e. the loops surrounding the references "loop_x" and
3441 the distance variables "dx". The layout of the OMEGA
3442 representation is as follows:
3443 - coef[0] is the constant
3444 - coef[1..nb_loops] are the protected variables that will not be
3445 removed by the solver: the "dx"
3446 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3448 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3449 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3451 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, true);
3454 ineq = omega_add_zero_geq (pb, omega_black);
3455 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3457 /* 0 <= loop_x + dx */
3458 ineq = omega_add_zero_geq (pb, omega_black);
3459 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3460 pb->geqs[ineq].coef[i + 1] = 1;
3464 /* loop_x <= nb_iters */
3465 ineq = omega_add_zero_geq (pb, omega_black);
3466 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3467 pb->geqs[ineq].coef[0] = nbi;
3469 /* loop_x + dx <= nb_iters */
3470 ineq = omega_add_zero_geq (pb, omega_black);
3471 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3472 pb->geqs[ineq].coef[i + 1] = -1;
3473 pb->geqs[ineq].coef[0] = nbi;
3475 /* A step "dx" bigger than nb_iters is not feasible, so
3476 add "0 <= nb_iters + dx", */
3477 ineq = omega_add_zero_geq (pb, omega_black);
3478 pb->geqs[ineq].coef[i + 1] = 1;
3479 pb->geqs[ineq].coef[0] = nbi;
3480 /* and "dx <= nb_iters". */
3481 ineq = omega_add_zero_geq (pb, omega_black);
3482 pb->geqs[ineq].coef[i + 1] = -1;
3483 pb->geqs[ineq].coef[0] = nbi;
3487 omega_extract_distance_vectors (pb, ddr);
3492 /* Sets up the Omega dependence problem for the data dependence
3493 relation DDR. Returns false when the constraint system cannot be
3494 built, ie. when the test answers "don't know". Returns true
3495 otherwise, and when independence has been proved (using one of the
3496 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3497 set MAYBE_DEPENDENT to true.
3499 Example: for setting up the dependence system corresponding to the
3500 conflicting accesses
3505 | ... A[2*j, 2*(i + j)]
3509 the following constraints come from the iteration domain:
3516 where di, dj are the distance variables. The constraints
3517 representing the conflicting elements are:
3520 i + 1 = 2 * (i + di + j + dj)
3522 For asking that the resulting distance vector (di, dj) be
3523 lexicographically positive, we insert the constraint "di >= 0". If
3524 "di = 0" in the solution, we fix that component to zero, and we
3525 look at the inner loops: we set a new problem where all the outer
3526 loop distances are zero, and fix this inner component to be
3527 positive. When one of the components is positive, we save that
3528 distance, and set a new problem where the distance on this loop is
3529 zero, searching for other distances in the inner loops. Here is
3530 the classic example that illustrates that we have to set for each
3531 inner loop a new problem:
3539 we have to save two distances (1, 0) and (0, 1).
3541 Given two array references, refA and refB, we have to set the
3542 dependence problem twice, refA vs. refB and refB vs. refA, and we
3543 cannot do a single test, as refB might occur before refA in the
3544 inner loops, and the contrary when considering outer loops: ex.
3549 | T[{1,+,1}_2][{1,+,1}_1] // refA
3550 | T[{2,+,1}_2][{0,+,1}_1] // refB
3555 refB touches the elements in T before refA, and thus for the same
3556 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3557 but for successive loop_0 iterations, we have (1, -1, 1)
3559 The Omega solver expects the distance variables ("di" in the
3560 previous example) to come first in the constraint system (as
3561 variables to be protected, or "safe" variables), the constraint
3562 system is built using the following layout:
3564 "cst | distance vars | index vars".
3568 init_omega_for_ddr (struct data_dependence_relation *ddr,
3569 bool *maybe_dependent)
3574 *maybe_dependent = true;
3576 if (same_access_functions (ddr))
3579 lambda_vector dir_v;
3581 /* Save the 0 vector. */
3582 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3583 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3584 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3585 dir_v[j] = dir_equal;
3586 save_dir_v (ddr, dir_v);
3588 /* Save the dependences carried by outer loops. */
3589 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3590 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3592 omega_free_problem (pb);
3596 /* Omega expects the protected variables (those that have to be kept
3597 after elimination) to appear first in the constraint system.
3598 These variables are the distance variables. In the following
3599 initialization we declare NB_LOOPS safe variables, and the total
3600 number of variables for the constraint system is 2*NB_LOOPS. */
3601 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3602 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3604 omega_free_problem (pb);
3606 /* Stop computation if not decidable, or no dependence. */
3607 if (res == false || *maybe_dependent == false)
3610 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3611 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3613 omega_free_problem (pb);
3618 /* Return true when DDR contains the same information as that stored
3619 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3622 ddr_consistent_p (FILE *file,
3623 struct data_dependence_relation *ddr,
3624 VEC (lambda_vector, heap) *dist_vects,
3625 VEC (lambda_vector, heap) *dir_vects)
3629 /* If dump_file is set, output there. */
3630 if (dump_file && (dump_flags & TDF_DETAILS))
3633 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3635 lambda_vector b_dist_v;
3636 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3637 VEC_length (lambda_vector, dist_vects),
3638 DDR_NUM_DIST_VECTS (ddr));
3640 fprintf (file, "Banerjee dist vectors:\n");
3641 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3642 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3644 fprintf (file, "Omega dist vectors:\n");
3645 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3646 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3648 fprintf (file, "data dependence relation:\n");
3649 dump_data_dependence_relation (file, ddr);
3651 fprintf (file, ")\n");
3655 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3657 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3658 VEC_length (lambda_vector, dir_vects),
3659 DDR_NUM_DIR_VECTS (ddr));
3663 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3665 lambda_vector a_dist_v;
3666 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3668 /* Distance vectors are not ordered in the same way in the DDR
3669 and in the DIST_VECTS: search for a matching vector. */
3670 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3671 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3674 if (j == VEC_length (lambda_vector, dist_vects))
3676 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3677 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3678 fprintf (file, "not found in Omega dist vectors:\n");
3679 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3680 fprintf (file, "data dependence relation:\n");
3681 dump_data_dependence_relation (file, ddr);
3682 fprintf (file, ")\n");
3686 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3688 lambda_vector a_dir_v;
3689 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3691 /* Direction vectors are not ordered in the same way in the DDR
3692 and in the DIR_VECTS: search for a matching vector. */
3693 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3694 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3697 if (j == VEC_length (lambda_vector, dist_vects))
3699 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3700 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3701 fprintf (file, "not found in Omega dir vectors:\n");
3702 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3703 fprintf (file, "data dependence relation:\n");
3704 dump_data_dependence_relation (file, ddr);
3705 fprintf (file, ")\n");
3712 /* This computes the affine dependence relation between A and B.
3713 CHREC_KNOWN is used for representing the independence between two
3714 accesses, while CHREC_DONT_KNOW is used for representing the unknown
3717 Note that it is possible to stop the computation of the dependence
3718 relation the first time we detect a CHREC_KNOWN element for a given
3722 compute_affine_dependence (struct data_dependence_relation *ddr)
3724 struct data_reference *dra = DDR_A (ddr);
3725 struct data_reference *drb = DDR_B (ddr);
3727 if (dump_file && (dump_flags & TDF_DETAILS))
3729 fprintf (dump_file, "(compute_affine_dependence\n");
3730 fprintf (dump_file, " (stmt_a = \n");
3731 print_generic_expr (dump_file, DR_STMT (dra), 0);
3732 fprintf (dump_file, ")\n (stmt_b = \n");
3733 print_generic_expr (dump_file, DR_STMT (drb), 0);
3734 fprintf (dump_file, ")\n");
3737 /* Analyze only when the dependence relation is not yet known. */
3738 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3740 dependence_stats.num_dependence_tests++;
3742 if (access_functions_are_affine_or_constant_p (dra)
3743 && access_functions_are_affine_or_constant_p (drb))
3745 if (flag_check_data_deps)
3747 /* Compute the dependences using the first algorithm. */
3748 subscript_dependence_tester (ddr);
3750 if (dump_file && (dump_flags & TDF_DETAILS))
3752 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3753 dump_data_dependence_relation (dump_file, ddr);
3756 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3758 bool maybe_dependent;
3759 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3761 /* Save the result of the first DD analyzer. */
3762 dist_vects = DDR_DIST_VECTS (ddr);
3763 dir_vects = DDR_DIR_VECTS (ddr);
3765 /* Reset the information. */
3766 DDR_DIST_VECTS (ddr) = NULL;
3767 DDR_DIR_VECTS (ddr) = NULL;
3769 /* Compute the same information using Omega. */
3770 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3771 goto csys_dont_know;
3773 if (dump_file && (dump_flags & TDF_DETAILS))
3775 fprintf (dump_file, "Omega Analyzer\n");
3776 dump_data_dependence_relation (dump_file, ddr);
3779 /* Check that we get the same information. */
3780 if (maybe_dependent)
3781 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3786 subscript_dependence_tester (ddr);
3789 /* As a last case, if the dependence cannot be determined, or if
3790 the dependence is considered too difficult to determine, answer
3795 dependence_stats.num_dependence_undetermined++;
3797 if (dump_file && (dump_flags & TDF_DETAILS))
3799 fprintf (dump_file, "Data ref a:\n");
3800 dump_data_reference (dump_file, dra);
3801 fprintf (dump_file, "Data ref b:\n");
3802 dump_data_reference (dump_file, drb);
3803 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3805 finalize_ddr_dependent (ddr, chrec_dont_know);
3809 if (dump_file && (dump_flags & TDF_DETAILS))
3810 fprintf (dump_file, ")\n");
3813 /* This computes the dependence relation for the same data
3814 reference into DDR. */
3817 compute_self_dependence (struct data_dependence_relation *ddr)
3820 struct subscript *subscript;
3822 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3825 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3828 /* The accessed index overlaps for each iteration. */
3829 SUB_CONFLICTS_IN_A (subscript)
3830 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3831 SUB_CONFLICTS_IN_B (subscript)
3832 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3833 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3836 /* The distance vector is the zero vector. */
3837 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3838 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3841 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3842 the data references in DATAREFS, in the LOOP_NEST. When
3843 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
3847 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
3848 VEC (ddr_p, heap) **dependence_relations,
3849 VEC (loop_p, heap) *loop_nest,
3850 bool compute_self_and_rr)
3852 struct data_dependence_relation *ddr;
3853 struct data_reference *a, *b;
3856 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3857 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
3858 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
3860 ddr = initialize_data_dependence_relation (a, b, loop_nest);
3861 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3862 compute_affine_dependence (ddr);
3865 if (compute_self_and_rr)
3866 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
3868 ddr = initialize_data_dependence_relation (a, a, loop_nest);
3869 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
3870 compute_self_dependence (ddr);
3874 /* Stores the locations of memory references in STMT to REFERENCES. Returns
3875 true if STMT clobbers memory, false otherwise. */
3878 get_references_in_stmt (tree stmt, VEC (data_ref_loc, heap) **references)
3880 bool clobbers_memory = false;
3882 tree *op0, *op1, call;
3886 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
3887 Calls have side-effects, except those to const or pure
3889 call = get_call_expr_in (stmt);
3891 && !(call_expr_flags (call) & (ECF_CONST | ECF_PURE)))
3892 || (TREE_CODE (stmt) == ASM_EXPR
3893 && ASM_VOLATILE_P (stmt)))
3894 clobbers_memory = true;
3896 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
3897 return clobbers_memory;
3899 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
3901 op0 = &GIMPLE_STMT_OPERAND (stmt, 0);
3902 op1 = &GIMPLE_STMT_OPERAND (stmt, 1);
3905 || REFERENCE_CLASS_P (*op1))
3907 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3909 ref->is_read = true;
3913 || REFERENCE_CLASS_P (*op0))
3915 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3917 ref->is_read = false;
3923 unsigned i, n = call_expr_nargs (call);
3925 for (i = 0; i < n; i++)
3927 op0 = &CALL_EXPR_ARG (call, i);
3930 || REFERENCE_CLASS_P (*op0))
3932 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
3934 ref->is_read = true;
3939 return clobbers_memory;
3942 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
3943 reference, returns false, otherwise returns true. NEST is the outermost
3944 loop of the loop nest in that the references should be analysed. */
3947 find_data_references_in_stmt (struct loop *nest, tree stmt,
3948 VEC (data_reference_p, heap) **datarefs)
3951 VEC (data_ref_loc, heap) *references;
3954 data_reference_p dr;
3956 if (get_references_in_stmt (stmt, &references))
3958 VEC_free (data_ref_loc, heap, references);
3962 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
3964 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
3965 gcc_assert (dr != NULL);
3967 /* FIXME -- data dependence analysis does not work correctly for objects with
3968 invariant addresses. Let us fail here until the problem is fixed. */
3969 if (dr_address_invariant_p (dr))
3972 if (dump_file && (dump_flags & TDF_DETAILS))
3973 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
3978 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
3980 VEC_free (data_ref_loc, heap, references);
3984 /* Search the data references in LOOP, and record the information into
3985 DATAREFS. Returns chrec_dont_know when failing to analyze a
3986 difficult case, returns NULL_TREE otherwise.
3988 TODO: This function should be made smarter so that it can handle address
3989 arithmetic as if they were array accesses, etc. */
3992 find_data_references_in_loop (struct loop *loop,
3993 VEC (data_reference_p, heap) **datarefs)
3995 basic_block bb, *bbs;
3997 block_stmt_iterator bsi;
3999 bbs = get_loop_body_in_dom_order (loop);
4001 for (i = 0; i < loop->num_nodes; i++)
4005 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
4007 tree stmt = bsi_stmt (bsi);
4009 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4011 struct data_reference *res;
4012 res = XCNEW (struct data_reference);
4013 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4016 return chrec_dont_know;
4025 /* Recursive helper function. */
4028 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4030 /* Inner loops of the nest should not contain siblings. Example:
4031 when there are two consecutive loops,
4042 the dependence relation cannot be captured by the distance
4047 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4049 return find_loop_nest_1 (loop->inner, loop_nest);
4053 /* Return false when the LOOP is not well nested. Otherwise return
4054 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4055 contain the loops from the outermost to the innermost, as they will
4056 appear in the classic distance vector. */
4059 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4061 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4063 return find_loop_nest_1 (loop->inner, loop_nest);
4067 /* Given a loop nest LOOP, the following vectors are returned:
4068 DATAREFS is initialized to all the array elements contained in this loop,
4069 DEPENDENCE_RELATIONS contains the relations between the data references.
4070 Compute read-read and self relations if
4071 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4074 compute_data_dependences_for_loop (struct loop *loop,
4075 bool compute_self_and_read_read_dependences,
4076 VEC (data_reference_p, heap) **datarefs,
4077 VEC (ddr_p, heap) **dependence_relations)
4079 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4081 memset (&dependence_stats, 0, sizeof (dependence_stats));
4083 /* If the loop nest is not well formed, or one of the data references
4084 is not computable, give up without spending time to compute other
4087 || !find_loop_nest (loop, &vloops)
4088 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4090 struct data_dependence_relation *ddr;
4092 /* Insert a single relation into dependence_relations:
4094 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4095 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4098 compute_all_dependences (*datarefs, dependence_relations, vloops,
4099 compute_self_and_read_read_dependences);
4101 if (dump_file && (dump_flags & TDF_STATS))
4103 fprintf (dump_file, "Dependence tester statistics:\n");
4105 fprintf (dump_file, "Number of dependence tests: %d\n",
4106 dependence_stats.num_dependence_tests);
4107 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4108 dependence_stats.num_dependence_dependent);
4109 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4110 dependence_stats.num_dependence_independent);
4111 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4112 dependence_stats.num_dependence_undetermined);
4114 fprintf (dump_file, "Number of subscript tests: %d\n",
4115 dependence_stats.num_subscript_tests);
4116 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4117 dependence_stats.num_subscript_undetermined);
4118 fprintf (dump_file, "Number of same subscript function: %d\n",
4119 dependence_stats.num_same_subscript_function);
4121 fprintf (dump_file, "Number of ziv tests: %d\n",
4122 dependence_stats.num_ziv);
4123 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4124 dependence_stats.num_ziv_dependent);
4125 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4126 dependence_stats.num_ziv_independent);
4127 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4128 dependence_stats.num_ziv_unimplemented);
4130 fprintf (dump_file, "Number of siv tests: %d\n",
4131 dependence_stats.num_siv);
4132 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4133 dependence_stats.num_siv_dependent);
4134 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4135 dependence_stats.num_siv_independent);
4136 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4137 dependence_stats.num_siv_unimplemented);
4139 fprintf (dump_file, "Number of miv tests: %d\n",
4140 dependence_stats.num_miv);
4141 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4142 dependence_stats.num_miv_dependent);
4143 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4144 dependence_stats.num_miv_independent);
4145 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4146 dependence_stats.num_miv_unimplemented);
4150 /* Entry point (for testing only). Analyze all the data references
4151 and the dependence relations in LOOP.
4153 The data references are computed first.
4155 A relation on these nodes is represented by a complete graph. Some
4156 of the relations could be of no interest, thus the relations can be
4159 In the following function we compute all the relations. This is
4160 just a first implementation that is here for:
4161 - for showing how to ask for the dependence relations,
4162 - for the debugging the whole dependence graph,
4163 - for the dejagnu testcases and maintenance.
4165 It is possible to ask only for a part of the graph, avoiding to
4166 compute the whole dependence graph. The computed dependences are
4167 stored in a knowledge base (KB) such that later queries don't
4168 recompute the same information. The implementation of this KB is
4169 transparent to the optimizer, and thus the KB can be changed with a
4170 more efficient implementation, or the KB could be disabled. */
4172 analyze_all_data_dependences (struct loop *loop)
4175 int nb_data_refs = 10;
4176 VEC (data_reference_p, heap) *datarefs =
4177 VEC_alloc (data_reference_p, heap, nb_data_refs);
4178 VEC (ddr_p, heap) *dependence_relations =
4179 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4181 /* Compute DDs on the whole function. */
4182 compute_data_dependences_for_loop (loop, false, &datarefs,
4183 &dependence_relations);
4187 dump_data_dependence_relations (dump_file, dependence_relations);
4188 fprintf (dump_file, "\n\n");
4190 if (dump_flags & TDF_DETAILS)
4191 dump_dist_dir_vectors (dump_file, dependence_relations);
4193 if (dump_flags & TDF_STATS)
4195 unsigned nb_top_relations = 0;
4196 unsigned nb_bot_relations = 0;
4197 unsigned nb_basename_differ = 0;
4198 unsigned nb_chrec_relations = 0;
4199 struct data_dependence_relation *ddr;
4201 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4203 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4206 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4208 struct data_reference *a = DDR_A (ddr);
4209 struct data_reference *b = DDR_B (ddr);
4211 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4212 nb_basename_differ++;
4218 nb_chrec_relations++;
4221 gather_stats_on_scev_database ();
4225 free_dependence_relations (dependence_relations);
4226 free_data_refs (datarefs);
4229 /* Computes all the data dependences and check that the results of
4230 several analyzers are the same. */
4233 tree_check_data_deps (void)
4236 struct loop *loop_nest;
4238 FOR_EACH_LOOP (li, loop_nest, 0)
4239 analyze_all_data_dependences (loop_nest);
4242 /* Free the memory used by a data dependence relation DDR. */
4245 free_dependence_relation (struct data_dependence_relation *ddr)
4250 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_SUBSCRIPTS (ddr))
4251 free_subscripts (DDR_SUBSCRIPTS (ddr));
4256 /* Free the memory used by the data dependence relations from
4257 DEPENDENCE_RELATIONS. */
4260 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4263 struct data_dependence_relation *ddr;
4264 VEC (loop_p, heap) *loop_nest = NULL;
4266 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4270 if (loop_nest == NULL)
4271 loop_nest = DDR_LOOP_NEST (ddr);
4273 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4274 || DDR_LOOP_NEST (ddr) == loop_nest);
4275 free_dependence_relation (ddr);
4279 VEC_free (loop_p, heap, loop_nest);
4280 VEC_free (ddr_p, heap, dependence_relations);
4283 /* Free the memory used by the data references from DATAREFS. */
4286 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4289 struct data_reference *dr;
4291 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4293 VEC_free (data_reference_p, heap, datarefs);