1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
89 static struct datadep_stats
91 int num_dependence_tests;
92 int num_dependence_dependent;
93 int num_dependence_independent;
94 int num_dependence_undetermined;
96 int num_subscript_tests;
97 int num_subscript_undetermined;
98 int num_same_subscript_function;
101 int num_ziv_independent;
102 int num_ziv_dependent;
103 int num_ziv_unimplemented;
106 int num_siv_independent;
107 int num_siv_dependent;
108 int num_siv_unimplemented;
111 int num_miv_independent;
112 int num_miv_dependent;
113 int num_miv_unimplemented;
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 struct data_reference *,
118 struct data_reference *,
120 /* Returns true iff A divides B. */
123 tree_fold_divides_p (const_tree a, const_tree b)
125 gcc_assert (TREE_CODE (a) == INTEGER_CST);
126 gcc_assert (TREE_CODE (b) == INTEGER_CST);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
130 /* Returns true iff A divides B. */
133 int_divides_p (int a, int b)
135 return ((b % a) == 0);
140 /* Dump into FILE all the data references from DATAREFS. */
143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
146 struct data_reference *dr;
148 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
149 dump_data_reference (file, dr);
152 /* Dump into STDERR all the data references from DATAREFS. */
155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
157 dump_data_references (stderr, datarefs);
160 /* Dump to STDERR all the dependence relations from DDRS. */
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165 dump_data_dependence_relations (stderr, ddrs);
168 /* Dump into FILE all the dependence relations from DDRS. */
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
175 struct data_dependence_relation *ddr;
177 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
178 dump_data_dependence_relation (file, ddr);
181 /* Print to STDERR the data_reference DR. */
184 debug_data_reference (struct data_reference *dr)
186 dump_data_reference (stderr, dr);
189 /* Dump function for a DATA_REFERENCE structure. */
192 dump_data_reference (FILE *outf,
193 struct data_reference *dr)
197 fprintf (outf, "#(Data Ref: \n");
198 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
199 fprintf (outf, "# stmt: ");
200 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
201 fprintf (outf, "# ref: ");
202 print_generic_stmt (outf, DR_REF (dr), 0);
203 fprintf (outf, "# base_object: ");
204 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
206 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
208 fprintf (outf, "# Access function %d: ", i);
209 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
211 fprintf (outf, "#)\n");
214 /* Dumps the affine function described by FN to the file OUTF. */
217 dump_affine_function (FILE *outf, affine_fn fn)
222 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
223 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
225 fprintf (outf, " + ");
226 print_generic_expr (outf, coef, TDF_SLIM);
227 fprintf (outf, " * x_%u", i);
231 /* Dumps the conflict function CF to the file OUTF. */
234 dump_conflict_function (FILE *outf, conflict_function *cf)
238 if (cf->n == NO_DEPENDENCE)
239 fprintf (outf, "no dependence\n");
240 else if (cf->n == NOT_KNOWN)
241 fprintf (outf, "not known\n");
244 for (i = 0; i < cf->n; i++)
247 dump_affine_function (outf, cf->fns[i]);
248 fprintf (outf, "]\n");
253 /* Dump function for a SUBSCRIPT structure. */
256 dump_subscript (FILE *outf, struct subscript *subscript)
258 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
260 fprintf (outf, "\n (subscript \n");
261 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
270 cf = SUB_CONFLICTS_IN_B (subscript);
271 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
272 dump_conflict_function (outf, cf);
273 if (CF_NONTRIVIAL_P (cf))
275 tree last_iteration = SUB_LAST_CONFLICT (subscript);
276 fprintf (outf, " last_conflict: ");
277 print_generic_stmt (outf, last_iteration, 0);
280 fprintf (outf, " (Subscript distance: ");
281 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
282 fprintf (outf, " )\n");
283 fprintf (outf, " )\n");
286 /* Print the classic direction vector DIRV to OUTF. */
289 print_direction_vector (FILE *outf,
295 for (eq = 0; eq < length; eq++)
297 enum data_dependence_direction dir = ((enum data_dependence_direction)
303 fprintf (outf, " +");
306 fprintf (outf, " -");
309 fprintf (outf, " =");
311 case dir_positive_or_equal:
312 fprintf (outf, " +=");
314 case dir_positive_or_negative:
315 fprintf (outf, " +-");
317 case dir_negative_or_equal:
318 fprintf (outf, " -=");
321 fprintf (outf, " *");
324 fprintf (outf, "indep");
328 fprintf (outf, "\n");
331 /* Print a vector of direction vectors. */
334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
340 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
341 print_direction_vector (outf, v, length);
344 /* Print out a vector VEC of length N to OUTFILE. */
347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
351 for (i = 0; i < n; i++)
352 fprintf (outfile, "%3d ", vector[i]);
353 fprintf (outfile, "\n");
356 /* Print a vector of distance vectors. */
359 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
365 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
366 print_lambda_vector (outf, v, length);
372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
374 dump_data_dependence_relation (stderr, ddr);
377 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
380 dump_data_dependence_relation (FILE *outf,
381 struct data_dependence_relation *ddr)
383 struct data_reference *dra, *drb;
385 fprintf (outf, "(Data Dep: \n");
387 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
394 dump_data_reference (outf, dra);
396 fprintf (outf, " (nil)\n");
398 dump_data_reference (outf, drb);
400 fprintf (outf, " (nil)\n");
402 fprintf (outf, " (don't know)\n)\n");
408 dump_data_reference (outf, dra);
409 dump_data_reference (outf, drb);
411 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
412 fprintf (outf, " (no dependence)\n");
414 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
419 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
421 fprintf (outf, " access_fn_A: ");
422 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
423 fprintf (outf, " access_fn_B: ");
424 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
425 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
428 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
429 fprintf (outf, " loop nest: (");
430 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
431 fprintf (outf, "%d ", loopi->num);
432 fprintf (outf, ")\n");
434 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
436 fprintf (outf, " distance_vector: ");
437 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
441 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
443 fprintf (outf, " direction_vector: ");
444 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
449 fprintf (outf, ")\n");
452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
455 dump_data_dependence_direction (FILE *file,
456 enum data_dependence_direction dir)
472 case dir_positive_or_negative:
473 fprintf (file, "+-");
476 case dir_positive_or_equal:
477 fprintf (file, "+=");
480 case dir_negative_or_equal:
481 fprintf (file, "-=");
493 /* Dumps the distance and direction vectors in FILE. DDRS contains
494 the dependence relations, and VECT_SIZE is the size of the
495 dependence vectors, or in other words the number of loops in the
499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
502 struct data_dependence_relation *ddr;
505 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
506 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
508 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
510 fprintf (file, "DISTANCE_V (");
511 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
515 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
517 fprintf (file, "DIRECTION_V (");
518 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
519 fprintf (file, ")\n");
523 fprintf (file, "\n\n");
526 /* Dumps the data dependence relations DDRS in FILE. */
529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
532 struct data_dependence_relation *ddr;
534 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
535 dump_data_dependence_relation (file, ddr);
537 fprintf (file, "\n\n");
540 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
541 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
542 constant of type ssizetype, and returns true. If we cannot do this
543 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
548 tree *var, tree *off)
552 enum tree_code ocode = code;
560 *var = build_int_cst (type, 0);
561 *off = fold_convert (ssizetype, op0);
564 case POINTER_PLUS_EXPR:
569 split_constant_offset (op0, &var0, &off0);
570 split_constant_offset (op1, &var1, &off1);
571 *var = fold_build2 (code, type, var0, var1);
572 *off = size_binop (ocode, off0, off1);
576 if (TREE_CODE (op1) != INTEGER_CST)
579 split_constant_offset (op0, &var0, &off0);
580 *var = fold_build2 (MULT_EXPR, type, var0, op1);
581 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
587 HOST_WIDE_INT pbitsize, pbitpos;
588 enum machine_mode pmode;
589 int punsignedp, pvolatilep;
591 op0 = TREE_OPERAND (op0, 0);
592 if (!handled_component_p (op0))
595 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
596 &pmode, &punsignedp, &pvolatilep, false);
598 if (pbitpos % BITS_PER_UNIT != 0)
600 base = build_fold_addr_expr (base);
601 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
605 split_constant_offset (poffset, &poffset, &off1);
606 off0 = size_binop (PLUS_EXPR, off0, off1);
607 if (POINTER_TYPE_P (TREE_TYPE (base)))
608 base = fold_build_pointer_plus (base, poffset);
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
614 var0 = fold_convert (type, base);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
680 split_constant_offset (tree exp, tree *var, tree *off)
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
686 *off = ssize_int (0);
689 if (tree_is_chrec (exp))
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
697 *var = fold_convert (type, e);
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
706 canonicalize_base_object_address (tree addr)
712 /* The base address may be obtained by casting from integer, in that case
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
717 if (TREE_CODE (addr) != ADDR_EXPR)
720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
728 dr_analyze_innermost (struct data_reference *dr)
730 gimple stmt = DR_STMT (dr);
731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
733 HOST_WIDE_INT pbitsize, pbitpos;
735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
739 bool in_loop = (loop && loop->num);
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
748 if (pbitpos % BITS_PER_UNIT != 0)
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
755 if (TREE_CODE (base) == MEM_REF)
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
767 base = TREE_OPERAND (base, 0);
770 base = build_fold_addr_expr (base);
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
838 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
840 VEC (tree, heap) *access_fns = NULL;
842 tree base, off, access_fn;
843 basic_block before_loop;
845 /* If analyzing a basic-block there are no indices to analyze
846 and thus no access functions. */
849 DR_BASE_OBJECT (dr) = DR_REF (dr);
850 DR_ACCESS_FNS (dr) = NULL;
854 ref = unshare_expr (DR_REF (dr));
855 before_loop = block_before_loop (nest);
857 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
858 into a two element array with a constant index. The base is
859 then just the immediate underlying object. */
860 if (TREE_CODE (ref) == REALPART_EXPR)
862 ref = TREE_OPERAND (ref, 0);
863 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
865 else if (TREE_CODE (ref) == IMAGPART_EXPR)
867 ref = TREE_OPERAND (ref, 0);
868 VEC_safe_push (tree, heap, access_fns, integer_one_node);
871 /* Analyze access functions of dimensions we know to be independent. */
873 while (handled_component_p (aref))
875 if (TREE_CODE (aref) == ARRAY_REF)
877 op = TREE_OPERAND (aref, 1);
878 access_fn = analyze_scalar_evolution (loop, op);
879 access_fn = instantiate_scev (before_loop, loop, access_fn);
880 VEC_safe_push (tree, heap, access_fns, access_fn);
881 /* For ARRAY_REFs the base is the reference with the index replaced
882 by zero if we can not strip it as the outermost component. */
884 ref = TREE_OPERAND (ref, 0);
886 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
889 aref = TREE_OPERAND (aref, 0);
892 /* If the address operand of a MEM_REF base has an evolution in the
893 analyzed nest, add it as an additional independent access-function. */
894 if (TREE_CODE (aref) == MEM_REF)
896 op = TREE_OPERAND (aref, 0);
897 access_fn = analyze_scalar_evolution (loop, op);
898 access_fn = instantiate_scev (before_loop, loop, access_fn);
899 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
901 base = initial_condition (access_fn);
902 split_constant_offset (base, &base, &off);
903 /* Fold the MEM_REF offset into the evolutions initial
904 value to make more bases comparable. */
905 if (!integer_zerop (TREE_OPERAND (aref, 1)))
907 off = size_binop (PLUS_EXPR, off,
908 fold_convert (ssizetype,
909 TREE_OPERAND (aref, 1)));
910 TREE_OPERAND (aref, 1)
911 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
913 access_fn = chrec_replace_initial_condition
914 (access_fn, fold_convert (TREE_TYPE (base), off));
915 TREE_OPERAND (aref, 0) = base;
916 VEC_safe_push (tree, heap, access_fns, access_fn);
920 DR_BASE_OBJECT (dr) = ref;
921 DR_ACCESS_FNS (dr) = access_fns;
924 /* Extracts the alias analysis information from the memory reference DR. */
927 dr_analyze_alias (struct data_reference *dr)
929 tree ref = DR_REF (dr);
930 tree base = get_base_address (ref), addr;
932 if (INDIRECT_REF_P (base)
933 || TREE_CODE (base) == MEM_REF)
935 addr = TREE_OPERAND (base, 0);
936 if (TREE_CODE (addr) == SSA_NAME)
937 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
941 /* Frees data reference DR. */
944 free_data_ref (data_reference_p dr)
946 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
950 /* Analyzes memory reference MEMREF accessed in STMT. The reference
951 is read if IS_READ is true, write otherwise. Returns the
952 data_reference description of MEMREF. NEST is the outermost loop
953 in which the reference should be instantiated, LOOP is the loop in
954 which the data reference should be analyzed. */
956 struct data_reference *
957 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
960 struct data_reference *dr;
962 if (dump_file && (dump_flags & TDF_DETAILS))
964 fprintf (dump_file, "Creating dr for ");
965 print_generic_expr (dump_file, memref, TDF_SLIM);
966 fprintf (dump_file, "\n");
969 dr = XCNEW (struct data_reference);
971 DR_REF (dr) = memref;
972 DR_IS_READ (dr) = is_read;
974 dr_analyze_innermost (dr);
975 dr_analyze_indices (dr, nest, loop);
976 dr_analyze_alias (dr);
978 if (dump_file && (dump_flags & TDF_DETAILS))
981 fprintf (dump_file, "\tbase_address: ");
982 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
983 fprintf (dump_file, "\n\toffset from base address: ");
984 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
985 fprintf (dump_file, "\n\tconstant offset from base address: ");
986 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
987 fprintf (dump_file, "\n\tstep: ");
988 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
989 fprintf (dump_file, "\n\taligned to: ");
990 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
991 fprintf (dump_file, "\n\tbase_object: ");
992 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
993 fprintf (dump_file, "\n");
994 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
996 fprintf (dump_file, "\tAccess function %d: ", i);
997 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1004 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1007 dr_equal_offsets_p1 (tree offset1, tree offset2)
1011 STRIP_NOPS (offset1);
1012 STRIP_NOPS (offset2);
1014 if (offset1 == offset2)
1017 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1018 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1021 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1022 TREE_OPERAND (offset2, 0));
1024 if (!res || !BINARY_CLASS_P (offset1))
1027 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1028 TREE_OPERAND (offset2, 1));
1033 /* Check if DRA and DRB have equal offsets. */
1035 dr_equal_offsets_p (struct data_reference *dra,
1036 struct data_reference *drb)
1038 tree offset1, offset2;
1040 offset1 = DR_OFFSET (dra);
1041 offset2 = DR_OFFSET (drb);
1043 return dr_equal_offsets_p1 (offset1, offset2);
1046 /* Returns true if FNA == FNB. */
1049 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1051 unsigned i, n = VEC_length (tree, fna);
1053 if (n != VEC_length (tree, fnb))
1056 for (i = 0; i < n; i++)
1057 if (!operand_equal_p (VEC_index (tree, fna, i),
1058 VEC_index (tree, fnb, i), 0))
1064 /* If all the functions in CF are the same, returns one of them,
1065 otherwise returns NULL. */
1068 common_affine_function (conflict_function *cf)
1073 if (!CF_NONTRIVIAL_P (cf))
1078 for (i = 1; i < cf->n; i++)
1079 if (!affine_function_equal_p (comm, cf->fns[i]))
1085 /* Returns the base of the affine function FN. */
1088 affine_function_base (affine_fn fn)
1090 return VEC_index (tree, fn, 0);
1093 /* Returns true if FN is a constant. */
1096 affine_function_constant_p (affine_fn fn)
1101 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1102 if (!integer_zerop (coef))
1108 /* Returns true if FN is the zero constant function. */
1111 affine_function_zero_p (affine_fn fn)
1113 return (integer_zerop (affine_function_base (fn))
1114 && affine_function_constant_p (fn));
1117 /* Returns a signed integer type with the largest precision from TA
1121 signed_type_for_types (tree ta, tree tb)
1123 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1124 return signed_type_for (ta);
1126 return signed_type_for (tb);
1129 /* Applies operation OP on affine functions FNA and FNB, and returns the
1133 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1139 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1141 n = VEC_length (tree, fna);
1142 m = VEC_length (tree, fnb);
1146 n = VEC_length (tree, fnb);
1147 m = VEC_length (tree, fna);
1150 ret = VEC_alloc (tree, heap, m);
1151 for (i = 0; i < n; i++)
1153 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1154 TREE_TYPE (VEC_index (tree, fnb, i)));
1156 VEC_quick_push (tree, ret,
1157 fold_build2 (op, type,
1158 VEC_index (tree, fna, i),
1159 VEC_index (tree, fnb, i)));
1162 for (; VEC_iterate (tree, fna, i, coef); i++)
1163 VEC_quick_push (tree, ret,
1164 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1165 coef, integer_zero_node));
1166 for (; VEC_iterate (tree, fnb, i, coef); i++)
1167 VEC_quick_push (tree, ret,
1168 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1169 integer_zero_node, coef));
1174 /* Returns the sum of affine functions FNA and FNB. */
1177 affine_fn_plus (affine_fn fna, affine_fn fnb)
1179 return affine_fn_op (PLUS_EXPR, fna, fnb);
1182 /* Returns the difference of affine functions FNA and FNB. */
1185 affine_fn_minus (affine_fn fna, affine_fn fnb)
1187 return affine_fn_op (MINUS_EXPR, fna, fnb);
1190 /* Frees affine function FN. */
1193 affine_fn_free (affine_fn fn)
1195 VEC_free (tree, heap, fn);
1198 /* Determine for each subscript in the data dependence relation DDR
1202 compute_subscript_distance (struct data_dependence_relation *ddr)
1204 conflict_function *cf_a, *cf_b;
1205 affine_fn fn_a, fn_b, diff;
1207 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1211 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1213 struct subscript *subscript;
1215 subscript = DDR_SUBSCRIPT (ddr, i);
1216 cf_a = SUB_CONFLICTS_IN_A (subscript);
1217 cf_b = SUB_CONFLICTS_IN_B (subscript);
1219 fn_a = common_affine_function (cf_a);
1220 fn_b = common_affine_function (cf_b);
1223 SUB_DISTANCE (subscript) = chrec_dont_know;
1226 diff = affine_fn_minus (fn_a, fn_b);
1228 if (affine_function_constant_p (diff))
1229 SUB_DISTANCE (subscript) = affine_function_base (diff);
1231 SUB_DISTANCE (subscript) = chrec_dont_know;
1233 affine_fn_free (diff);
1238 /* Returns the conflict function for "unknown". */
1240 static conflict_function *
1241 conflict_fn_not_known (void)
1243 conflict_function *fn = XCNEW (conflict_function);
1249 /* Returns the conflict function for "independent". */
1251 static conflict_function *
1252 conflict_fn_no_dependence (void)
1254 conflict_function *fn = XCNEW (conflict_function);
1255 fn->n = NO_DEPENDENCE;
1260 /* Returns true if the address of OBJ is invariant in LOOP. */
1263 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1265 while (handled_component_p (obj))
1267 if (TREE_CODE (obj) == ARRAY_REF)
1269 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1270 need to check the stride and the lower bound of the reference. */
1271 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1273 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1277 else if (TREE_CODE (obj) == COMPONENT_REF)
1279 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1283 obj = TREE_OPERAND (obj, 0);
1286 if (!INDIRECT_REF_P (obj)
1287 && TREE_CODE (obj) != MEM_REF)
1290 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1294 /* Returns false if we can prove that data references A and B do not alias,
1295 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1299 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1302 tree addr_a = DR_BASE_OBJECT (a);
1303 tree addr_b = DR_BASE_OBJECT (b);
1305 /* If we are not processing a loop nest but scalar code we
1306 do not need to care about possible cross-iteration dependences
1307 and thus can process the full original reference. Do so,
1308 similar to how loop invariant motion applies extra offset-based
1312 aff_tree off1, off2;
1313 double_int size1, size2;
1314 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1315 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1316 aff_combination_scale (&off1, double_int_minus_one);
1317 aff_combination_add (&off2, &off1);
1318 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1322 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1323 return refs_output_dependent_p (addr_a, addr_b);
1324 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1325 return refs_anti_dependent_p (addr_a, addr_b);
1326 return refs_may_alias_p (addr_a, addr_b);
1329 static void compute_self_dependence (struct data_dependence_relation *);
1331 /* Initialize a data dependence relation between data accesses A and
1332 B. NB_LOOPS is the number of loops surrounding the references: the
1333 size of the classic distance/direction vectors. */
1335 static struct data_dependence_relation *
1336 initialize_data_dependence_relation (struct data_reference *a,
1337 struct data_reference *b,
1338 VEC (loop_p, heap) *loop_nest)
1340 struct data_dependence_relation *res;
1343 res = XNEW (struct data_dependence_relation);
1346 DDR_LOOP_NEST (res) = NULL;
1347 DDR_REVERSED_P (res) = false;
1348 DDR_SUBSCRIPTS (res) = NULL;
1349 DDR_DIR_VECTS (res) = NULL;
1350 DDR_DIST_VECTS (res) = NULL;
1352 if (a == NULL || b == NULL)
1354 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1358 /* If the data references do not alias, then they are independent. */
1359 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1361 DDR_ARE_DEPENDENT (res) = chrec_known;
1365 /* When the references are exactly the same, don't spend time doing
1366 the data dependence tests, just initialize the ddr and return. */
1367 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1369 DDR_AFFINE_P (res) = true;
1370 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1371 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1372 DDR_LOOP_NEST (res) = loop_nest;
1373 DDR_INNER_LOOP (res) = 0;
1374 DDR_SELF_REFERENCE (res) = true;
1375 compute_self_dependence (res);
1379 /* If the references do not access the same object, we do not know
1380 whether they alias or not. */
1381 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1383 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1387 /* If the base of the object is not invariant in the loop nest, we cannot
1388 analyze it. TODO -- in fact, it would suffice to record that there may
1389 be arbitrary dependences in the loops where the base object varies. */
1391 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1392 DR_BASE_OBJECT (a)))
1394 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1398 /* If the number of dimensions of the access to not agree we can have
1399 a pointer access to a component of the array element type and an
1400 array access while the base-objects are still the same. Punt. */
1401 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1403 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1407 DDR_AFFINE_P (res) = true;
1408 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1409 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1410 DDR_LOOP_NEST (res) = loop_nest;
1411 DDR_INNER_LOOP (res) = 0;
1412 DDR_SELF_REFERENCE (res) = false;
1414 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1416 struct subscript *subscript;
1418 subscript = XNEW (struct subscript);
1419 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1420 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1421 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1422 SUB_DISTANCE (subscript) = chrec_dont_know;
1423 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1429 /* Frees memory used by the conflict function F. */
1432 free_conflict_function (conflict_function *f)
1436 if (CF_NONTRIVIAL_P (f))
1438 for (i = 0; i < f->n; i++)
1439 affine_fn_free (f->fns[i]);
1444 /* Frees memory used by SUBSCRIPTS. */
1447 free_subscripts (VEC (subscript_p, heap) *subscripts)
1452 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1454 free_conflict_function (s->conflicting_iterations_in_a);
1455 free_conflict_function (s->conflicting_iterations_in_b);
1458 VEC_free (subscript_p, heap, subscripts);
1461 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1465 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1468 if (dump_file && (dump_flags & TDF_DETAILS))
1470 fprintf (dump_file, "(dependence classified: ");
1471 print_generic_expr (dump_file, chrec, 0);
1472 fprintf (dump_file, ")\n");
1475 DDR_ARE_DEPENDENT (ddr) = chrec;
1476 free_subscripts (DDR_SUBSCRIPTS (ddr));
1477 DDR_SUBSCRIPTS (ddr) = NULL;
1480 /* The dependence relation DDR cannot be represented by a distance
1484 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1486 if (dump_file && (dump_flags & TDF_DETAILS))
1487 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1489 DDR_AFFINE_P (ddr) = false;
1494 /* This section contains the classic Banerjee tests. */
1496 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1497 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1500 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1502 return (evolution_function_is_constant_p (chrec_a)
1503 && evolution_function_is_constant_p (chrec_b));
1506 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1507 variable, i.e., if the SIV (Single Index Variable) test is true. */
1510 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1512 if ((evolution_function_is_constant_p (chrec_a)
1513 && evolution_function_is_univariate_p (chrec_b))
1514 || (evolution_function_is_constant_p (chrec_b)
1515 && evolution_function_is_univariate_p (chrec_a)))
1518 if (evolution_function_is_univariate_p (chrec_a)
1519 && evolution_function_is_univariate_p (chrec_b))
1521 switch (TREE_CODE (chrec_a))
1523 case POLYNOMIAL_CHREC:
1524 switch (TREE_CODE (chrec_b))
1526 case POLYNOMIAL_CHREC:
1527 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1542 /* Creates a conflict function with N dimensions. The affine functions
1543 in each dimension follow. */
1545 static conflict_function *
1546 conflict_fn (unsigned n, ...)
1549 conflict_function *ret = XCNEW (conflict_function);
1552 gcc_assert (0 < n && n <= MAX_DIM);
1556 for (i = 0; i < n; i++)
1557 ret->fns[i] = va_arg (ap, affine_fn);
1563 /* Returns constant affine function with value CST. */
1566 affine_fn_cst (tree cst)
1568 affine_fn fn = VEC_alloc (tree, heap, 1);
1569 VEC_quick_push (tree, fn, cst);
1573 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1576 affine_fn_univar (tree cst, unsigned dim, tree coef)
1578 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1581 gcc_assert (dim > 0);
1582 VEC_quick_push (tree, fn, cst);
1583 for (i = 1; i < dim; i++)
1584 VEC_quick_push (tree, fn, integer_zero_node);
1585 VEC_quick_push (tree, fn, coef);
1589 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1590 *OVERLAPS_B are initialized to the functions that describe the
1591 relation between the elements accessed twice by CHREC_A and
1592 CHREC_B. For k >= 0, the following property is verified:
1594 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1597 analyze_ziv_subscript (tree chrec_a,
1599 conflict_function **overlaps_a,
1600 conflict_function **overlaps_b,
1601 tree *last_conflicts)
1603 tree type, difference;
1604 dependence_stats.num_ziv++;
1606 if (dump_file && (dump_flags & TDF_DETAILS))
1607 fprintf (dump_file, "(analyze_ziv_subscript \n");
1609 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1610 chrec_a = chrec_convert (type, chrec_a, NULL);
1611 chrec_b = chrec_convert (type, chrec_b, NULL);
1612 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1614 switch (TREE_CODE (difference))
1617 if (integer_zerop (difference))
1619 /* The difference is equal to zero: the accessed index
1620 overlaps for each iteration in the loop. */
1621 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1622 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1623 *last_conflicts = chrec_dont_know;
1624 dependence_stats.num_ziv_dependent++;
1628 /* The accesses do not overlap. */
1629 *overlaps_a = conflict_fn_no_dependence ();
1630 *overlaps_b = conflict_fn_no_dependence ();
1631 *last_conflicts = integer_zero_node;
1632 dependence_stats.num_ziv_independent++;
1637 /* We're not sure whether the indexes overlap. For the moment,
1638 conservatively answer "don't know". */
1639 if (dump_file && (dump_flags & TDF_DETAILS))
1640 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1642 *overlaps_a = conflict_fn_not_known ();
1643 *overlaps_b = conflict_fn_not_known ();
1644 *last_conflicts = chrec_dont_know;
1645 dependence_stats.num_ziv_unimplemented++;
1649 if (dump_file && (dump_flags & TDF_DETAILS))
1650 fprintf (dump_file, ")\n");
1653 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1654 and only if it fits to the int type. If this is not the case, or the
1655 bound on the number of iterations of LOOP could not be derived, returns
1659 max_stmt_executions_tree (struct loop *loop)
1663 if (!max_stmt_executions (loop, true, &nit))
1664 return chrec_dont_know;
1666 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1667 return chrec_dont_know;
1669 return double_int_to_tree (unsigned_type_node, nit);
1672 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1673 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1674 *OVERLAPS_B are initialized to the functions that describe the
1675 relation between the elements accessed twice by CHREC_A and
1676 CHREC_B. For k >= 0, the following property is verified:
1678 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1681 analyze_siv_subscript_cst_affine (tree chrec_a,
1683 conflict_function **overlaps_a,
1684 conflict_function **overlaps_b,
1685 tree *last_conflicts)
1687 bool value0, value1, value2;
1688 tree type, difference, tmp;
1690 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1691 chrec_a = chrec_convert (type, chrec_a, NULL);
1692 chrec_b = chrec_convert (type, chrec_b, NULL);
1693 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1695 if (!chrec_is_positive (initial_condition (difference), &value0))
1697 if (dump_file && (dump_flags & TDF_DETAILS))
1698 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1700 dependence_stats.num_siv_unimplemented++;
1701 *overlaps_a = conflict_fn_not_known ();
1702 *overlaps_b = conflict_fn_not_known ();
1703 *last_conflicts = chrec_dont_know;
1708 if (value0 == false)
1710 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1712 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1715 *overlaps_a = conflict_fn_not_known ();
1716 *overlaps_b = conflict_fn_not_known ();
1717 *last_conflicts = chrec_dont_know;
1718 dependence_stats.num_siv_unimplemented++;
1727 chrec_b = {10, +, 1}
1730 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1732 HOST_WIDE_INT numiter;
1733 struct loop *loop = get_chrec_loop (chrec_b);
1735 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1736 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1737 fold_build1 (ABS_EXPR, type, difference),
1738 CHREC_RIGHT (chrec_b));
1739 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1740 *last_conflicts = integer_one_node;
1743 /* Perform weak-zero siv test to see if overlap is
1744 outside the loop bounds. */
1745 numiter = max_stmt_executions_int (loop, true);
1748 && compare_tree_int (tmp, numiter) > 0)
1750 free_conflict_function (*overlaps_a);
1751 free_conflict_function (*overlaps_b);
1752 *overlaps_a = conflict_fn_no_dependence ();
1753 *overlaps_b = conflict_fn_no_dependence ();
1754 *last_conflicts = integer_zero_node;
1755 dependence_stats.num_siv_independent++;
1758 dependence_stats.num_siv_dependent++;
1762 /* When the step does not divide the difference, there are
1766 *overlaps_a = conflict_fn_no_dependence ();
1767 *overlaps_b = conflict_fn_no_dependence ();
1768 *last_conflicts = integer_zero_node;
1769 dependence_stats.num_siv_independent++;
1778 chrec_b = {10, +, -1}
1780 In this case, chrec_a will not overlap with chrec_b. */
1781 *overlaps_a = conflict_fn_no_dependence ();
1782 *overlaps_b = conflict_fn_no_dependence ();
1783 *last_conflicts = integer_zero_node;
1784 dependence_stats.num_siv_independent++;
1791 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1793 if (dump_file && (dump_flags & TDF_DETAILS))
1794 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1796 *overlaps_a = conflict_fn_not_known ();
1797 *overlaps_b = conflict_fn_not_known ();
1798 *last_conflicts = chrec_dont_know;
1799 dependence_stats.num_siv_unimplemented++;
1804 if (value2 == false)
1808 chrec_b = {10, +, -1}
1810 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1812 HOST_WIDE_INT numiter;
1813 struct loop *loop = get_chrec_loop (chrec_b);
1815 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1816 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1817 CHREC_RIGHT (chrec_b));
1818 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1819 *last_conflicts = integer_one_node;
1821 /* Perform weak-zero siv test to see if overlap is
1822 outside the loop bounds. */
1823 numiter = max_stmt_executions_int (loop, true);
1826 && compare_tree_int (tmp, numiter) > 0)
1828 free_conflict_function (*overlaps_a);
1829 free_conflict_function (*overlaps_b);
1830 *overlaps_a = conflict_fn_no_dependence ();
1831 *overlaps_b = conflict_fn_no_dependence ();
1832 *last_conflicts = integer_zero_node;
1833 dependence_stats.num_siv_independent++;
1836 dependence_stats.num_siv_dependent++;
1840 /* When the step does not divide the difference, there
1844 *overlaps_a = conflict_fn_no_dependence ();
1845 *overlaps_b = conflict_fn_no_dependence ();
1846 *last_conflicts = integer_zero_node;
1847 dependence_stats.num_siv_independent++;
1857 In this case, chrec_a will not overlap with chrec_b. */
1858 *overlaps_a = conflict_fn_no_dependence ();
1859 *overlaps_b = conflict_fn_no_dependence ();
1860 *last_conflicts = integer_zero_node;
1861 dependence_stats.num_siv_independent++;
1869 /* Helper recursive function for initializing the matrix A. Returns
1870 the initial value of CHREC. */
1873 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1877 switch (TREE_CODE (chrec))
1879 case POLYNOMIAL_CHREC:
1880 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1882 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1883 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1889 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1890 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1892 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1897 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1898 return chrec_convert (chrec_type (chrec), op, NULL);
1903 /* Handle ~X as -1 - X. */
1904 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1905 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1906 build_int_cst (TREE_TYPE (chrec), -1), op);
1918 #define FLOOR_DIV(x,y) ((x) / (y))
1920 /* Solves the special case of the Diophantine equation:
1921 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1923 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1924 number of iterations that loops X and Y run. The overlaps will be
1925 constructed as evolutions in dimension DIM. */
1928 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1929 affine_fn *overlaps_a,
1930 affine_fn *overlaps_b,
1931 tree *last_conflicts, int dim)
1933 if (((step_a > 0 && step_b > 0)
1934 || (step_a < 0 && step_b < 0)))
1936 int step_overlaps_a, step_overlaps_b;
1937 int gcd_steps_a_b, last_conflict, tau2;
1939 gcd_steps_a_b = gcd (step_a, step_b);
1940 step_overlaps_a = step_b / gcd_steps_a_b;
1941 step_overlaps_b = step_a / gcd_steps_a_b;
1945 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1946 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1947 last_conflict = tau2;
1948 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1951 *last_conflicts = chrec_dont_know;
1953 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1954 build_int_cst (NULL_TREE,
1956 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1957 build_int_cst (NULL_TREE,
1963 *overlaps_a = affine_fn_cst (integer_zero_node);
1964 *overlaps_b = affine_fn_cst (integer_zero_node);
1965 *last_conflicts = integer_zero_node;
1969 /* Solves the special case of a Diophantine equation where CHREC_A is
1970 an affine bivariate function, and CHREC_B is an affine univariate
1971 function. For example,
1973 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1975 has the following overlapping functions:
1977 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1978 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1979 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1981 FORNOW: This is a specialized implementation for a case occurring in
1982 a common benchmark. Implement the general algorithm. */
1985 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1986 conflict_function **overlaps_a,
1987 conflict_function **overlaps_b,
1988 tree *last_conflicts)
1990 bool xz_p, yz_p, xyz_p;
1991 int step_x, step_y, step_z;
1992 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1993 affine_fn overlaps_a_xz, overlaps_b_xz;
1994 affine_fn overlaps_a_yz, overlaps_b_yz;
1995 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1996 affine_fn ova1, ova2, ovb;
1997 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1999 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2000 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2001 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2004 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2005 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2006 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2008 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2010 if (dump_file && (dump_flags & TDF_DETAILS))
2011 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2013 *overlaps_a = conflict_fn_not_known ();
2014 *overlaps_b = conflict_fn_not_known ();
2015 *last_conflicts = chrec_dont_know;
2019 niter = MIN (niter_x, niter_z);
2020 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2023 &last_conflicts_xz, 1);
2024 niter = MIN (niter_y, niter_z);
2025 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2028 &last_conflicts_yz, 2);
2029 niter = MIN (niter_x, niter_z);
2030 niter = MIN (niter_y, niter);
2031 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2034 &last_conflicts_xyz, 3);
2036 xz_p = !integer_zerop (last_conflicts_xz);
2037 yz_p = !integer_zerop (last_conflicts_yz);
2038 xyz_p = !integer_zerop (last_conflicts_xyz);
2040 if (xz_p || yz_p || xyz_p)
2042 ova1 = affine_fn_cst (integer_zero_node);
2043 ova2 = affine_fn_cst (integer_zero_node);
2044 ovb = affine_fn_cst (integer_zero_node);
2047 affine_fn t0 = ova1;
2050 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2051 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2052 affine_fn_free (t0);
2053 affine_fn_free (t2);
2054 *last_conflicts = last_conflicts_xz;
2058 affine_fn t0 = ova2;
2061 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2062 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2063 affine_fn_free (t0);
2064 affine_fn_free (t2);
2065 *last_conflicts = last_conflicts_yz;
2069 affine_fn t0 = ova1;
2070 affine_fn t2 = ova2;
2073 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2074 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2075 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2076 affine_fn_free (t0);
2077 affine_fn_free (t2);
2078 affine_fn_free (t4);
2079 *last_conflicts = last_conflicts_xyz;
2081 *overlaps_a = conflict_fn (2, ova1, ova2);
2082 *overlaps_b = conflict_fn (1, ovb);
2086 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2087 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2088 *last_conflicts = integer_zero_node;
2091 affine_fn_free (overlaps_a_xz);
2092 affine_fn_free (overlaps_b_xz);
2093 affine_fn_free (overlaps_a_yz);
2094 affine_fn_free (overlaps_b_yz);
2095 affine_fn_free (overlaps_a_xyz);
2096 affine_fn_free (overlaps_b_xyz);
2099 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2102 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2105 memcpy (vec2, vec1, size * sizeof (*vec1));
2108 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2111 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2116 for (i = 0; i < m; i++)
2117 lambda_vector_copy (mat1[i], mat2[i], n);
2120 /* Store the N x N identity matrix in MAT. */
2123 lambda_matrix_id (lambda_matrix mat, int size)
2127 for (i = 0; i < size; i++)
2128 for (j = 0; j < size; j++)
2129 mat[i][j] = (i == j) ? 1 : 0;
2132 /* Return the first nonzero element of vector VEC1 between START and N.
2133 We must have START <= N. Returns N if VEC1 is the zero vector. */
2136 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2139 while (j < n && vec1[j] == 0)
2144 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2145 R2 = R2 + CONST1 * R1. */
2148 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2155 for (i = 0; i < n; i++)
2156 mat[r2][i] += const1 * mat[r1][i];
2159 /* Swap rows R1 and R2 in matrix MAT. */
2162 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2171 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2172 and store the result in VEC2. */
2175 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2176 int size, int const1)
2181 lambda_vector_clear (vec2, size);
2183 for (i = 0; i < size; i++)
2184 vec2[i] = const1 * vec1[i];
2187 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2190 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2193 lambda_vector_mult_const (vec1, vec2, size, -1);
2196 /* Negate row R1 of matrix MAT which has N columns. */
2199 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2201 lambda_vector_negate (mat[r1], mat[r1], n);
2204 /* Return true if two vectors are equal. */
2207 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2210 for (i = 0; i < size; i++)
2211 if (vec1[i] != vec2[i])
2216 /* Given an M x N integer matrix A, this function determines an M x
2217 M unimodular matrix U, and an M x N echelon matrix S such that
2218 "U.A = S". This decomposition is also known as "right Hermite".
2220 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2221 Restructuring Compilers" Utpal Banerjee. */
2224 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2225 lambda_matrix S, lambda_matrix U)
2229 lambda_matrix_copy (A, S, m, n);
2230 lambda_matrix_id (U, m);
2232 for (j = 0; j < n; j++)
2234 if (lambda_vector_first_nz (S[j], m, i0) < m)
2237 for (i = m - 1; i >= i0; i--)
2239 while (S[i][j] != 0)
2241 int sigma, factor, a, b;
2245 sigma = (a * b < 0) ? -1: 1;
2248 factor = sigma * (a / b);
2250 lambda_matrix_row_add (S, n, i, i-1, -factor);
2251 lambda_matrix_row_exchange (S, i, i-1);
2253 lambda_matrix_row_add (U, m, i, i-1, -factor);
2254 lambda_matrix_row_exchange (U, i, i-1);
2261 /* Determines the overlapping elements due to accesses CHREC_A and
2262 CHREC_B, that are affine functions. This function cannot handle
2263 symbolic evolution functions, ie. when initial conditions are
2264 parameters, because it uses lambda matrices of integers. */
2267 analyze_subscript_affine_affine (tree chrec_a,
2269 conflict_function **overlaps_a,
2270 conflict_function **overlaps_b,
2271 tree *last_conflicts)
2273 unsigned nb_vars_a, nb_vars_b, dim;
2274 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2275 lambda_matrix A, U, S;
2276 struct obstack scratch_obstack;
2278 if (eq_evolutions_p (chrec_a, chrec_b))
2280 /* The accessed index overlaps for each iteration in the
2282 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2283 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2284 *last_conflicts = chrec_dont_know;
2287 if (dump_file && (dump_flags & TDF_DETAILS))
2288 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2290 /* For determining the initial intersection, we have to solve a
2291 Diophantine equation. This is the most time consuming part.
2293 For answering to the question: "Is there a dependence?" we have
2294 to prove that there exists a solution to the Diophantine
2295 equation, and that the solution is in the iteration domain,
2296 i.e. the solution is positive or zero, and that the solution
2297 happens before the upper bound loop.nb_iterations. Otherwise
2298 there is no dependence. This function outputs a description of
2299 the iterations that hold the intersections. */
2301 nb_vars_a = nb_vars_in_chrec (chrec_a);
2302 nb_vars_b = nb_vars_in_chrec (chrec_b);
2304 gcc_obstack_init (&scratch_obstack);
2306 dim = nb_vars_a + nb_vars_b;
2307 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2308 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2309 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2311 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2312 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2313 gamma = init_b - init_a;
2315 /* Don't do all the hard work of solving the Diophantine equation
2316 when we already know the solution: for example,
2319 | gamma = 3 - 3 = 0.
2320 Then the first overlap occurs during the first iterations:
2321 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2325 if (nb_vars_a == 1 && nb_vars_b == 1)
2327 HOST_WIDE_INT step_a, step_b;
2328 HOST_WIDE_INT niter, niter_a, niter_b;
2331 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2332 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2333 niter = MIN (niter_a, niter_b);
2334 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2335 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2337 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2340 *overlaps_a = conflict_fn (1, ova);
2341 *overlaps_b = conflict_fn (1, ovb);
2344 else if (nb_vars_a == 2 && nb_vars_b == 1)
2345 compute_overlap_steps_for_affine_1_2
2346 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2348 else if (nb_vars_a == 1 && nb_vars_b == 2)
2349 compute_overlap_steps_for_affine_1_2
2350 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2354 if (dump_file && (dump_flags & TDF_DETAILS))
2355 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2356 *overlaps_a = conflict_fn_not_known ();
2357 *overlaps_b = conflict_fn_not_known ();
2358 *last_conflicts = chrec_dont_know;
2360 goto end_analyze_subs_aa;
2364 lambda_matrix_right_hermite (A, dim, 1, S, U);
2369 lambda_matrix_row_negate (U, dim, 0);
2371 gcd_alpha_beta = S[0][0];
2373 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2374 but that is a quite strange case. Instead of ICEing, answer
2376 if (gcd_alpha_beta == 0)
2378 *overlaps_a = conflict_fn_not_known ();
2379 *overlaps_b = conflict_fn_not_known ();
2380 *last_conflicts = chrec_dont_know;
2381 goto end_analyze_subs_aa;
2384 /* The classic "gcd-test". */
2385 if (!int_divides_p (gcd_alpha_beta, gamma))
2387 /* The "gcd-test" has determined that there is no integer
2388 solution, i.e. there is no dependence. */
2389 *overlaps_a = conflict_fn_no_dependence ();
2390 *overlaps_b = conflict_fn_no_dependence ();
2391 *last_conflicts = integer_zero_node;
2394 /* Both access functions are univariate. This includes SIV and MIV cases. */
2395 else if (nb_vars_a == 1 && nb_vars_b == 1)
2397 /* Both functions should have the same evolution sign. */
2398 if (((A[0][0] > 0 && -A[1][0] > 0)
2399 || (A[0][0] < 0 && -A[1][0] < 0)))
2401 /* The solutions are given by:
2403 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2406 For a given integer t. Using the following variables,
2408 | i0 = u11 * gamma / gcd_alpha_beta
2409 | j0 = u12 * gamma / gcd_alpha_beta
2416 | y0 = j0 + j1 * t. */
2417 HOST_WIDE_INT i0, j0, i1, j1;
2419 i0 = U[0][0] * gamma / gcd_alpha_beta;
2420 j0 = U[0][1] * gamma / gcd_alpha_beta;
2424 if ((i1 == 0 && i0 < 0)
2425 || (j1 == 0 && j0 < 0))
2427 /* There is no solution.
2428 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2429 falls in here, but for the moment we don't look at the
2430 upper bound of the iteration domain. */
2431 *overlaps_a = conflict_fn_no_dependence ();
2432 *overlaps_b = conflict_fn_no_dependence ();
2433 *last_conflicts = integer_zero_node;
2434 goto end_analyze_subs_aa;
2437 if (i1 > 0 && j1 > 0)
2439 HOST_WIDE_INT niter_a = max_stmt_executions_int
2440 (get_chrec_loop (chrec_a), true);
2441 HOST_WIDE_INT niter_b = max_stmt_executions_int
2442 (get_chrec_loop (chrec_b), true);
2443 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2445 /* (X0, Y0) is a solution of the Diophantine equation:
2446 "chrec_a (X0) = chrec_b (Y0)". */
2447 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2449 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2450 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2452 /* (X1, Y1) is the smallest positive solution of the eq
2453 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2454 first conflict occurs. */
2455 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2456 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2457 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2461 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2462 FLOOR_DIV (niter - j0, j1));
2463 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2465 /* If the overlap occurs outside of the bounds of the
2466 loop, there is no dependence. */
2467 if (x1 >= niter || y1 >= niter)
2469 *overlaps_a = conflict_fn_no_dependence ();
2470 *overlaps_b = conflict_fn_no_dependence ();
2471 *last_conflicts = integer_zero_node;
2472 goto end_analyze_subs_aa;
2475 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2478 *last_conflicts = chrec_dont_know;
2482 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2484 build_int_cst (NULL_TREE, i1)));
2487 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2489 build_int_cst (NULL_TREE, j1)));
2493 /* FIXME: For the moment, the upper bound of the
2494 iteration domain for i and j is not checked. */
2495 if (dump_file && (dump_flags & TDF_DETAILS))
2496 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2497 *overlaps_a = conflict_fn_not_known ();
2498 *overlaps_b = conflict_fn_not_known ();
2499 *last_conflicts = chrec_dont_know;
2504 if (dump_file && (dump_flags & TDF_DETAILS))
2505 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2506 *overlaps_a = conflict_fn_not_known ();
2507 *overlaps_b = conflict_fn_not_known ();
2508 *last_conflicts = chrec_dont_know;
2513 if (dump_file && (dump_flags & TDF_DETAILS))
2514 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2515 *overlaps_a = conflict_fn_not_known ();
2516 *overlaps_b = conflict_fn_not_known ();
2517 *last_conflicts = chrec_dont_know;
2520 end_analyze_subs_aa:
2521 obstack_free (&scratch_obstack, NULL);
2522 if (dump_file && (dump_flags & TDF_DETAILS))
2524 fprintf (dump_file, " (overlaps_a = ");
2525 dump_conflict_function (dump_file, *overlaps_a);
2526 fprintf (dump_file, ")\n (overlaps_b = ");
2527 dump_conflict_function (dump_file, *overlaps_b);
2528 fprintf (dump_file, ")\n");
2529 fprintf (dump_file, ")\n");
2533 /* Returns true when analyze_subscript_affine_affine can be used for
2534 determining the dependence relation between chrec_a and chrec_b,
2535 that contain symbols. This function modifies chrec_a and chrec_b
2536 such that the analysis result is the same, and such that they don't
2537 contain symbols, and then can safely be passed to the analyzer.
2539 Example: The analysis of the following tuples of evolutions produce
2540 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2543 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2544 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2548 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2550 tree diff, type, left_a, left_b, right_b;
2552 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2553 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2554 /* FIXME: For the moment not handled. Might be refined later. */
2557 type = chrec_type (*chrec_a);
2558 left_a = CHREC_LEFT (*chrec_a);
2559 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2560 diff = chrec_fold_minus (type, left_a, left_b);
2562 if (!evolution_function_is_constant_p (diff))
2565 if (dump_file && (dump_flags & TDF_DETAILS))
2566 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2568 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2569 diff, CHREC_RIGHT (*chrec_a));
2570 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2571 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2572 build_int_cst (type, 0),
2577 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2578 *OVERLAPS_B are initialized to the functions that describe the
2579 relation between the elements accessed twice by CHREC_A and
2580 CHREC_B. For k >= 0, the following property is verified:
2582 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2585 analyze_siv_subscript (tree chrec_a,
2587 conflict_function **overlaps_a,
2588 conflict_function **overlaps_b,
2589 tree *last_conflicts,
2592 dependence_stats.num_siv++;
2594 if (dump_file && (dump_flags & TDF_DETAILS))
2595 fprintf (dump_file, "(analyze_siv_subscript \n");
2597 if (evolution_function_is_constant_p (chrec_a)
2598 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2599 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2600 overlaps_a, overlaps_b, last_conflicts);
2602 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2603 && evolution_function_is_constant_p (chrec_b))
2604 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2605 overlaps_b, overlaps_a, last_conflicts);
2607 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2608 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2610 if (!chrec_contains_symbols (chrec_a)
2611 && !chrec_contains_symbols (chrec_b))
2613 analyze_subscript_affine_affine (chrec_a, chrec_b,
2614 overlaps_a, overlaps_b,
2617 if (CF_NOT_KNOWN_P (*overlaps_a)
2618 || CF_NOT_KNOWN_P (*overlaps_b))
2619 dependence_stats.num_siv_unimplemented++;
2620 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2621 || CF_NO_DEPENDENCE_P (*overlaps_b))
2622 dependence_stats.num_siv_independent++;
2624 dependence_stats.num_siv_dependent++;
2626 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2629 analyze_subscript_affine_affine (chrec_a, chrec_b,
2630 overlaps_a, overlaps_b,
2633 if (CF_NOT_KNOWN_P (*overlaps_a)
2634 || CF_NOT_KNOWN_P (*overlaps_b))
2635 dependence_stats.num_siv_unimplemented++;
2636 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2637 || CF_NO_DEPENDENCE_P (*overlaps_b))
2638 dependence_stats.num_siv_independent++;
2640 dependence_stats.num_siv_dependent++;
2643 goto siv_subscript_dontknow;
2648 siv_subscript_dontknow:;
2649 if (dump_file && (dump_flags & TDF_DETAILS))
2650 fprintf (dump_file, "siv test failed: unimplemented.\n");
2651 *overlaps_a = conflict_fn_not_known ();
2652 *overlaps_b = conflict_fn_not_known ();
2653 *last_conflicts = chrec_dont_know;
2654 dependence_stats.num_siv_unimplemented++;
2657 if (dump_file && (dump_flags & TDF_DETAILS))
2658 fprintf (dump_file, ")\n");
2661 /* Returns false if we can prove that the greatest common divisor of the steps
2662 of CHREC does not divide CST, false otherwise. */
2665 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2667 HOST_WIDE_INT cd = 0, val;
2670 if (!host_integerp (cst, 0))
2672 val = tree_low_cst (cst, 0);
2674 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2676 step = CHREC_RIGHT (chrec);
2677 if (!host_integerp (step, 0))
2679 cd = gcd (cd, tree_low_cst (step, 0));
2680 chrec = CHREC_LEFT (chrec);
2683 return val % cd == 0;
2686 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2687 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2688 functions that describe the relation between the elements accessed
2689 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2692 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2695 analyze_miv_subscript (tree chrec_a,
2697 conflict_function **overlaps_a,
2698 conflict_function **overlaps_b,
2699 tree *last_conflicts,
2700 struct loop *loop_nest)
2702 tree type, difference;
2704 dependence_stats.num_miv++;
2705 if (dump_file && (dump_flags & TDF_DETAILS))
2706 fprintf (dump_file, "(analyze_miv_subscript \n");
2708 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2709 chrec_a = chrec_convert (type, chrec_a, NULL);
2710 chrec_b = chrec_convert (type, chrec_b, NULL);
2711 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2713 if (eq_evolutions_p (chrec_a, chrec_b))
2715 /* Access functions are the same: all the elements are accessed
2716 in the same order. */
2717 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2718 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2719 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2720 dependence_stats.num_miv_dependent++;
2723 else if (evolution_function_is_constant_p (difference)
2724 /* For the moment, the following is verified:
2725 evolution_function_is_affine_multivariate_p (chrec_a,
2727 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2729 /* testsuite/.../ssa-chrec-33.c
2730 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2732 The difference is 1, and all the evolution steps are multiples
2733 of 2, consequently there are no overlapping elements. */
2734 *overlaps_a = conflict_fn_no_dependence ();
2735 *overlaps_b = conflict_fn_no_dependence ();
2736 *last_conflicts = integer_zero_node;
2737 dependence_stats.num_miv_independent++;
2740 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2741 && !chrec_contains_symbols (chrec_a)
2742 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2743 && !chrec_contains_symbols (chrec_b))
2745 /* testsuite/.../ssa-chrec-35.c
2746 {0, +, 1}_2 vs. {0, +, 1}_3
2747 the overlapping elements are respectively located at iterations:
2748 {0, +, 1}_x and {0, +, 1}_x,
2749 in other words, we have the equality:
2750 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2753 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2754 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2756 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2757 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2759 analyze_subscript_affine_affine (chrec_a, chrec_b,
2760 overlaps_a, overlaps_b, last_conflicts);
2762 if (CF_NOT_KNOWN_P (*overlaps_a)
2763 || CF_NOT_KNOWN_P (*overlaps_b))
2764 dependence_stats.num_miv_unimplemented++;
2765 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2766 || CF_NO_DEPENDENCE_P (*overlaps_b))
2767 dependence_stats.num_miv_independent++;
2769 dependence_stats.num_miv_dependent++;
2774 /* When the analysis is too difficult, answer "don't know". */
2775 if (dump_file && (dump_flags & TDF_DETAILS))
2776 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2778 *overlaps_a = conflict_fn_not_known ();
2779 *overlaps_b = conflict_fn_not_known ();
2780 *last_conflicts = chrec_dont_know;
2781 dependence_stats.num_miv_unimplemented++;
2784 if (dump_file && (dump_flags & TDF_DETAILS))
2785 fprintf (dump_file, ")\n");
2788 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2789 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2790 OVERLAP_ITERATIONS_B are initialized with two functions that
2791 describe the iterations that contain conflicting elements.
2793 Remark: For an integer k >= 0, the following equality is true:
2795 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2799 analyze_overlapping_iterations (tree chrec_a,
2801 conflict_function **overlap_iterations_a,
2802 conflict_function **overlap_iterations_b,
2803 tree *last_conflicts, struct loop *loop_nest)
2805 unsigned int lnn = loop_nest->num;
2807 dependence_stats.num_subscript_tests++;
2809 if (dump_file && (dump_flags & TDF_DETAILS))
2811 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2812 fprintf (dump_file, " (chrec_a = ");
2813 print_generic_expr (dump_file, chrec_a, 0);
2814 fprintf (dump_file, ")\n (chrec_b = ");
2815 print_generic_expr (dump_file, chrec_b, 0);
2816 fprintf (dump_file, ")\n");
2819 if (chrec_a == NULL_TREE
2820 || chrec_b == NULL_TREE
2821 || chrec_contains_undetermined (chrec_a)
2822 || chrec_contains_undetermined (chrec_b))
2824 dependence_stats.num_subscript_undetermined++;
2826 *overlap_iterations_a = conflict_fn_not_known ();
2827 *overlap_iterations_b = conflict_fn_not_known ();
2830 /* If they are the same chrec, and are affine, they overlap
2831 on every iteration. */
2832 else if (eq_evolutions_p (chrec_a, chrec_b)
2833 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2834 || operand_equal_p (chrec_a, chrec_b, 0)))
2836 dependence_stats.num_same_subscript_function++;
2837 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2838 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2839 *last_conflicts = chrec_dont_know;
2842 /* If they aren't the same, and aren't affine, we can't do anything
2844 else if ((chrec_contains_symbols (chrec_a)
2845 || chrec_contains_symbols (chrec_b))
2846 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2847 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2849 dependence_stats.num_subscript_undetermined++;
2850 *overlap_iterations_a = conflict_fn_not_known ();
2851 *overlap_iterations_b = conflict_fn_not_known ();
2854 else if (ziv_subscript_p (chrec_a, chrec_b))
2855 analyze_ziv_subscript (chrec_a, chrec_b,
2856 overlap_iterations_a, overlap_iterations_b,
2859 else if (siv_subscript_p (chrec_a, chrec_b))
2860 analyze_siv_subscript (chrec_a, chrec_b,
2861 overlap_iterations_a, overlap_iterations_b,
2862 last_conflicts, lnn);
2865 analyze_miv_subscript (chrec_a, chrec_b,
2866 overlap_iterations_a, overlap_iterations_b,
2867 last_conflicts, loop_nest);
2869 if (dump_file && (dump_flags & TDF_DETAILS))
2871 fprintf (dump_file, " (overlap_iterations_a = ");
2872 dump_conflict_function (dump_file, *overlap_iterations_a);
2873 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2874 dump_conflict_function (dump_file, *overlap_iterations_b);
2875 fprintf (dump_file, ")\n");
2876 fprintf (dump_file, ")\n");
2880 /* Helper function for uniquely inserting distance vectors. */
2883 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2888 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2889 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2892 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2895 /* Helper function for uniquely inserting direction vectors. */
2898 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2903 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2904 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2907 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2910 /* Add a distance of 1 on all the loops outer than INDEX. If we
2911 haven't yet determined a distance for this outer loop, push a new
2912 distance vector composed of the previous distance, and a distance
2913 of 1 for this outer loop. Example:
2921 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2922 save (0, 1), then we have to save (1, 0). */
2925 add_outer_distances (struct data_dependence_relation *ddr,
2926 lambda_vector dist_v, int index)
2928 /* For each outer loop where init_v is not set, the accesses are
2929 in dependence of distance 1 in the loop. */
2930 while (--index >= 0)
2932 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2933 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2935 save_dist_v (ddr, save_v);
2939 /* Return false when fail to represent the data dependence as a
2940 distance vector. INIT_B is set to true when a component has been
2941 added to the distance vector DIST_V. INDEX_CARRY is then set to
2942 the index in DIST_V that carries the dependence. */
2945 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2946 struct data_reference *ddr_a,
2947 struct data_reference *ddr_b,
2948 lambda_vector dist_v, bool *init_b,
2952 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2954 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2956 tree access_fn_a, access_fn_b;
2957 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2959 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2961 non_affine_dependence_relation (ddr);
2965 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2966 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2968 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2969 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2972 int var_a = CHREC_VARIABLE (access_fn_a);
2973 int var_b = CHREC_VARIABLE (access_fn_b);
2976 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2978 non_affine_dependence_relation (ddr);
2982 dist = int_cst_value (SUB_DISTANCE (subscript));
2983 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2984 *index_carry = MIN (index, *index_carry);
2986 /* This is the subscript coupling test. If we have already
2987 recorded a distance for this loop (a distance coming from
2988 another subscript), it should be the same. For example,
2989 in the following code, there is no dependence:
2996 if (init_v[index] != 0 && dist_v[index] != dist)
2998 finalize_ddr_dependent (ddr, chrec_known);
3002 dist_v[index] = dist;
3006 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3008 /* This can be for example an affine vs. constant dependence
3009 (T[i] vs. T[3]) that is not an affine dependence and is
3010 not representable as a distance vector. */
3011 non_affine_dependence_relation (ddr);
3019 /* Return true when the DDR contains only constant access functions. */
3022 constant_access_functions (const struct data_dependence_relation *ddr)
3026 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3027 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3028 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3034 /* Helper function for the case where DDR_A and DDR_B are the same
3035 multivariate access function with a constant step. For an example
3039 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3042 tree c_1 = CHREC_LEFT (c_2);
3043 tree c_0 = CHREC_LEFT (c_1);
3044 lambda_vector dist_v;
3047 /* Polynomials with more than 2 variables are not handled yet. When
3048 the evolution steps are parameters, it is not possible to
3049 represent the dependence using classical distance vectors. */
3050 if (TREE_CODE (c_0) != INTEGER_CST
3051 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3052 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3054 DDR_AFFINE_P (ddr) = false;
3058 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3059 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3061 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3062 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3063 v1 = int_cst_value (CHREC_RIGHT (c_1));
3064 v2 = int_cst_value (CHREC_RIGHT (c_2));
3077 save_dist_v (ddr, dist_v);
3079 add_outer_distances (ddr, dist_v, x_1);
3082 /* Helper function for the case where DDR_A and DDR_B are the same
3083 access functions. */
3086 add_other_self_distances (struct data_dependence_relation *ddr)
3088 lambda_vector dist_v;
3090 int index_carry = DDR_NB_LOOPS (ddr);
3092 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3094 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3096 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3098 if (!evolution_function_is_univariate_p (access_fun))
3100 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3102 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3106 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3108 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3109 add_multivariate_self_dist (ddr, access_fun);
3111 /* The evolution step is not constant: it varies in
3112 the outer loop, so this cannot be represented by a
3113 distance vector. For example in pr34635.c the
3114 evolution is {0, +, {0, +, 4}_1}_2. */
3115 DDR_AFFINE_P (ddr) = false;
3120 index_carry = MIN (index_carry,
3121 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3122 DDR_LOOP_NEST (ddr)));
3126 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3127 add_outer_distances (ddr, dist_v, index_carry);
3131 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3133 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3135 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3136 save_dist_v (ddr, dist_v);
3139 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3140 is the case for example when access functions are the same and
3141 equal to a constant, as in:
3148 in which case the distance vectors are (0) and (1). */
3151 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3155 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3157 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3158 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3159 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3161 for (j = 0; j < ca->n; j++)
3162 if (affine_function_zero_p (ca->fns[j]))
3164 insert_innermost_unit_dist_vector (ddr);
3168 for (j = 0; j < cb->n; j++)
3169 if (affine_function_zero_p (cb->fns[j]))
3171 insert_innermost_unit_dist_vector (ddr);
3177 /* Compute the classic per loop distance vector. DDR is the data
3178 dependence relation to build a vector from. Return false when fail
3179 to represent the data dependence as a distance vector. */
3182 build_classic_dist_vector (struct data_dependence_relation *ddr,
3183 struct loop *loop_nest)
3185 bool init_b = false;
3186 int index_carry = DDR_NB_LOOPS (ddr);
3187 lambda_vector dist_v;
3189 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3192 if (same_access_functions (ddr))
3194 /* Save the 0 vector. */
3195 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3196 save_dist_v (ddr, dist_v);
3198 if (constant_access_functions (ddr))
3199 add_distance_for_zero_overlaps (ddr);
3201 if (DDR_NB_LOOPS (ddr) > 1)
3202 add_other_self_distances (ddr);
3207 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3208 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3209 dist_v, &init_b, &index_carry))
3212 /* Save the distance vector if we initialized one. */
3215 /* Verify a basic constraint: classic distance vectors should
3216 always be lexicographically positive.
3218 Data references are collected in the order of execution of
3219 the program, thus for the following loop
3221 | for (i = 1; i < 100; i++)
3222 | for (j = 1; j < 100; j++)
3224 | t = T[j+1][i-1]; // A
3225 | T[j][i] = t + 2; // B
3228 references are collected following the direction of the wind:
3229 A then B. The data dependence tests are performed also
3230 following this order, such that we're looking at the distance
3231 separating the elements accessed by A from the elements later
3232 accessed by B. But in this example, the distance returned by
3233 test_dep (A, B) is lexicographically negative (-1, 1), that
3234 means that the access A occurs later than B with respect to
3235 the outer loop, ie. we're actually looking upwind. In this
3236 case we solve test_dep (B, A) looking downwind to the
3237 lexicographically positive solution, that returns the
3238 distance vector (1, -1). */
3239 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3241 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3242 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3245 compute_subscript_distance (ddr);
3246 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3247 save_v, &init_b, &index_carry))
3249 save_dist_v (ddr, save_v);
3250 DDR_REVERSED_P (ddr) = true;
3252 /* In this case there is a dependence forward for all the
3255 | for (k = 1; k < 100; k++)
3256 | for (i = 1; i < 100; i++)
3257 | for (j = 1; j < 100; j++)
3259 | t = T[j+1][i-1]; // A
3260 | T[j][i] = t + 2; // B
3268 if (DDR_NB_LOOPS (ddr) > 1)
3270 add_outer_distances (ddr, save_v, index_carry);
3271 add_outer_distances (ddr, dist_v, index_carry);
3276 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3277 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3279 if (DDR_NB_LOOPS (ddr) > 1)
3281 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3283 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3284 DDR_A (ddr), loop_nest))
3286 compute_subscript_distance (ddr);
3287 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3288 opposite_v, &init_b,
3292 save_dist_v (ddr, save_v);
3293 add_outer_distances (ddr, dist_v, index_carry);
3294 add_outer_distances (ddr, opposite_v, index_carry);
3297 save_dist_v (ddr, save_v);
3302 /* There is a distance of 1 on all the outer loops: Example:
3303 there is a dependence of distance 1 on loop_1 for the array A.
3309 add_outer_distances (ddr, dist_v,
3310 lambda_vector_first_nz (dist_v,
3311 DDR_NB_LOOPS (ddr), 0));
3314 if (dump_file && (dump_flags & TDF_DETAILS))
3318 fprintf (dump_file, "(build_classic_dist_vector\n");
3319 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3321 fprintf (dump_file, " dist_vector = (");
3322 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3323 DDR_NB_LOOPS (ddr));
3324 fprintf (dump_file, " )\n");
3326 fprintf (dump_file, ")\n");
3332 /* Return the direction for a given distance.
3333 FIXME: Computing dir this way is suboptimal, since dir can catch
3334 cases that dist is unable to represent. */
3336 static inline enum data_dependence_direction
3337 dir_from_dist (int dist)
3340 return dir_positive;
3342 return dir_negative;
3347 /* Compute the classic per loop direction vector. DDR is the data
3348 dependence relation to build a vector from. */
3351 build_classic_dir_vector (struct data_dependence_relation *ddr)
3354 lambda_vector dist_v;
3356 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3358 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3360 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3361 dir_v[j] = dir_from_dist (dist_v[j]);
3363 save_dir_v (ddr, dir_v);
3367 /* Helper function. Returns true when there is a dependence between
3368 data references DRA and DRB. */
3371 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3372 struct data_reference *dra,
3373 struct data_reference *drb,
3374 struct loop *loop_nest)
3377 tree last_conflicts;
3378 struct subscript *subscript;
3380 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3383 conflict_function *overlaps_a, *overlaps_b;
3385 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3386 DR_ACCESS_FN (drb, i),
3387 &overlaps_a, &overlaps_b,
3388 &last_conflicts, loop_nest);
3390 if (CF_NOT_KNOWN_P (overlaps_a)
3391 || CF_NOT_KNOWN_P (overlaps_b))
3393 finalize_ddr_dependent (ddr, chrec_dont_know);
3394 dependence_stats.num_dependence_undetermined++;
3395 free_conflict_function (overlaps_a);
3396 free_conflict_function (overlaps_b);
3400 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3401 || CF_NO_DEPENDENCE_P (overlaps_b))
3403 finalize_ddr_dependent (ddr, chrec_known);
3404 dependence_stats.num_dependence_independent++;
3405 free_conflict_function (overlaps_a);
3406 free_conflict_function (overlaps_b);
3412 if (SUB_CONFLICTS_IN_A (subscript))
3413 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3414 if (SUB_CONFLICTS_IN_B (subscript))
3415 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3417 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3418 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3419 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3426 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3429 subscript_dependence_tester (struct data_dependence_relation *ddr,
3430 struct loop *loop_nest)
3433 if (dump_file && (dump_flags & TDF_DETAILS))
3434 fprintf (dump_file, "(subscript_dependence_tester \n");
3436 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3437 dependence_stats.num_dependence_dependent++;
3439 compute_subscript_distance (ddr);
3440 if (build_classic_dist_vector (ddr, loop_nest))
3441 build_classic_dir_vector (ddr);
3443 if (dump_file && (dump_flags & TDF_DETAILS))
3444 fprintf (dump_file, ")\n");
3447 /* Returns true when all the access functions of A are affine or
3448 constant with respect to LOOP_NEST. */
3451 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3452 const struct loop *loop_nest)
3455 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3458 FOR_EACH_VEC_ELT (tree, fns, i, t)
3459 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3460 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3466 /* Initializes an equation for an OMEGA problem using the information
3467 contained in the ACCESS_FUN. Returns true when the operation
3470 PB is the omega constraint system.
3471 EQ is the number of the equation to be initialized.
3472 OFFSET is used for shifting the variables names in the constraints:
3473 a constrain is composed of 2 * the number of variables surrounding
3474 dependence accesses. OFFSET is set either to 0 for the first n variables,
3475 then it is set to n.
3476 ACCESS_FUN is expected to be an affine chrec. */
3479 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3480 unsigned int offset, tree access_fun,
3481 struct data_dependence_relation *ddr)
3483 switch (TREE_CODE (access_fun))
3485 case POLYNOMIAL_CHREC:
3487 tree left = CHREC_LEFT (access_fun);
3488 tree right = CHREC_RIGHT (access_fun);
3489 int var = CHREC_VARIABLE (access_fun);
3492 if (TREE_CODE (right) != INTEGER_CST)
3495 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3496 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3498 /* Compute the innermost loop index. */
3499 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3502 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3503 += int_cst_value (right);
3505 switch (TREE_CODE (left))
3507 case POLYNOMIAL_CHREC:
3508 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3511 pb->eqs[eq].coef[0] += int_cst_value (left);
3520 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3528 /* As explained in the comments preceding init_omega_for_ddr, we have
3529 to set up a system for each loop level, setting outer loops
3530 variation to zero, and current loop variation to positive or zero.
3531 Save each lexico positive distance vector. */
3534 omega_extract_distance_vectors (omega_pb pb,
3535 struct data_dependence_relation *ddr)
3539 struct loop *loopi, *loopj;
3540 enum omega_result res;
3542 /* Set a new problem for each loop in the nest. The basis is the
3543 problem that we have initialized until now. On top of this we
3544 add new constraints. */
3545 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3546 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3549 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3550 DDR_NB_LOOPS (ddr));
3552 omega_copy_problem (copy, pb);
3554 /* For all the outer loops "loop_j", add "dj = 0". */
3556 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3558 eq = omega_add_zero_eq (copy, omega_black);
3559 copy->eqs[eq].coef[j + 1] = 1;
3562 /* For "loop_i", add "0 <= di". */
3563 geq = omega_add_zero_geq (copy, omega_black);
3564 copy->geqs[geq].coef[i + 1] = 1;
3566 /* Reduce the constraint system, and test that the current
3567 problem is feasible. */
3568 res = omega_simplify_problem (copy);
3569 if (res == omega_false
3570 || res == omega_unknown
3571 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3574 for (eq = 0; eq < copy->num_subs; eq++)
3575 if (copy->subs[eq].key == (int) i + 1)
3577 dist = copy->subs[eq].coef[0];
3583 /* Reinitialize problem... */
3584 omega_copy_problem (copy, pb);
3586 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3588 eq = omega_add_zero_eq (copy, omega_black);
3589 copy->eqs[eq].coef[j + 1] = 1;
3592 /* ..., but this time "di = 1". */
3593 eq = omega_add_zero_eq (copy, omega_black);
3594 copy->eqs[eq].coef[i + 1] = 1;
3595 copy->eqs[eq].coef[0] = -1;
3597 res = omega_simplify_problem (copy);
3598 if (res == omega_false
3599 || res == omega_unknown
3600 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3603 for (eq = 0; eq < copy->num_subs; eq++)
3604 if (copy->subs[eq].key == (int) i + 1)
3606 dist = copy->subs[eq].coef[0];
3612 /* Save the lexicographically positive distance vector. */
3615 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3616 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3620 for (eq = 0; eq < copy->num_subs; eq++)
3621 if (copy->subs[eq].key > 0)
3623 dist = copy->subs[eq].coef[0];
3624 dist_v[copy->subs[eq].key - 1] = dist;
3627 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3628 dir_v[j] = dir_from_dist (dist_v[j]);
3630 save_dist_v (ddr, dist_v);
3631 save_dir_v (ddr, dir_v);
3635 omega_free_problem (copy);
3639 /* This is called for each subscript of a tuple of data references:
3640 insert an equality for representing the conflicts. */
3643 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3644 struct data_dependence_relation *ddr,
3645 omega_pb pb, bool *maybe_dependent)
3648 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3649 TREE_TYPE (access_fun_b));
3650 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3651 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3652 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3655 /* When the fun_a - fun_b is not constant, the dependence is not
3656 captured by the classic distance vector representation. */
3657 if (TREE_CODE (difference) != INTEGER_CST)
3661 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3663 /* There is no dependence. */
3664 *maybe_dependent = false;
3668 minus_one = build_int_cst (type, -1);
3669 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3671 eq = omega_add_zero_eq (pb, omega_black);
3672 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3673 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3674 /* There is probably a dependence, but the system of
3675 constraints cannot be built: answer "don't know". */
3679 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3680 && !int_divides_p (lambda_vector_gcd
3681 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3682 2 * DDR_NB_LOOPS (ddr)),
3683 pb->eqs[eq].coef[0]))
3685 /* There is no dependence. */
3686 *maybe_dependent = false;
3693 /* Helper function, same as init_omega_for_ddr but specialized for
3694 data references A and B. */
3697 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3698 struct data_dependence_relation *ddr,
3699 omega_pb pb, bool *maybe_dependent)
3704 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3706 /* Insert an equality per subscript. */
3707 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3709 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3710 ddr, pb, maybe_dependent))
3712 else if (*maybe_dependent == false)
3714 /* There is no dependence. */
3715 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3720 /* Insert inequalities: constraints corresponding to the iteration
3721 domain, i.e. the loops surrounding the references "loop_x" and
3722 the distance variables "dx". The layout of the OMEGA
3723 representation is as follows:
3724 - coef[0] is the constant
3725 - coef[1..nb_loops] are the protected variables that will not be
3726 removed by the solver: the "dx"
3727 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3729 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3730 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3732 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3735 ineq = omega_add_zero_geq (pb, omega_black);
3736 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3738 /* 0 <= loop_x + dx */
3739 ineq = omega_add_zero_geq (pb, omega_black);
3740 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3741 pb->geqs[ineq].coef[i + 1] = 1;
3745 /* loop_x <= nb_iters */
3746 ineq = omega_add_zero_geq (pb, omega_black);
3747 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3748 pb->geqs[ineq].coef[0] = nbi;
3750 /* loop_x + dx <= nb_iters */
3751 ineq = omega_add_zero_geq (pb, omega_black);
3752 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3753 pb->geqs[ineq].coef[i + 1] = -1;
3754 pb->geqs[ineq].coef[0] = nbi;
3756 /* A step "dx" bigger than nb_iters is not feasible, so
3757 add "0 <= nb_iters + dx", */
3758 ineq = omega_add_zero_geq (pb, omega_black);
3759 pb->geqs[ineq].coef[i + 1] = 1;
3760 pb->geqs[ineq].coef[0] = nbi;
3761 /* and "dx <= nb_iters". */
3762 ineq = omega_add_zero_geq (pb, omega_black);
3763 pb->geqs[ineq].coef[i + 1] = -1;
3764 pb->geqs[ineq].coef[0] = nbi;
3768 omega_extract_distance_vectors (pb, ddr);
3773 /* Sets up the Omega dependence problem for the data dependence
3774 relation DDR. Returns false when the constraint system cannot be
3775 built, ie. when the test answers "don't know". Returns true
3776 otherwise, and when independence has been proved (using one of the
3777 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3778 set MAYBE_DEPENDENT to true.
3780 Example: for setting up the dependence system corresponding to the
3781 conflicting accesses
3786 | ... A[2*j, 2*(i + j)]
3790 the following constraints come from the iteration domain:
3797 where di, dj are the distance variables. The constraints
3798 representing the conflicting elements are:
3801 i + 1 = 2 * (i + di + j + dj)
3803 For asking that the resulting distance vector (di, dj) be
3804 lexicographically positive, we insert the constraint "di >= 0". If
3805 "di = 0" in the solution, we fix that component to zero, and we
3806 look at the inner loops: we set a new problem where all the outer
3807 loop distances are zero, and fix this inner component to be
3808 positive. When one of the components is positive, we save that
3809 distance, and set a new problem where the distance on this loop is
3810 zero, searching for other distances in the inner loops. Here is
3811 the classic example that illustrates that we have to set for each
3812 inner loop a new problem:
3820 we have to save two distances (1, 0) and (0, 1).
3822 Given two array references, refA and refB, we have to set the
3823 dependence problem twice, refA vs. refB and refB vs. refA, and we
3824 cannot do a single test, as refB might occur before refA in the
3825 inner loops, and the contrary when considering outer loops: ex.
3830 | T[{1,+,1}_2][{1,+,1}_1] // refA
3831 | T[{2,+,1}_2][{0,+,1}_1] // refB
3836 refB touches the elements in T before refA, and thus for the same
3837 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3838 but for successive loop_0 iterations, we have (1, -1, 1)
3840 The Omega solver expects the distance variables ("di" in the
3841 previous example) to come first in the constraint system (as
3842 variables to be protected, or "safe" variables), the constraint
3843 system is built using the following layout:
3845 "cst | distance vars | index vars".
3849 init_omega_for_ddr (struct data_dependence_relation *ddr,
3850 bool *maybe_dependent)
3855 *maybe_dependent = true;
3857 if (same_access_functions (ddr))
3860 lambda_vector dir_v;
3862 /* Save the 0 vector. */
3863 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3864 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3865 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3866 dir_v[j] = dir_equal;
3867 save_dir_v (ddr, dir_v);
3869 /* Save the dependences carried by outer loops. */
3870 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3871 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3873 omega_free_problem (pb);
3877 /* Omega expects the protected variables (those that have to be kept
3878 after elimination) to appear first in the constraint system.
3879 These variables are the distance variables. In the following
3880 initialization we declare NB_LOOPS safe variables, and the total
3881 number of variables for the constraint system is 2*NB_LOOPS. */
3882 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3883 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3885 omega_free_problem (pb);
3887 /* Stop computation if not decidable, or no dependence. */
3888 if (res == false || *maybe_dependent == false)
3891 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3892 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3894 omega_free_problem (pb);
3899 /* Return true when DDR contains the same information as that stored
3900 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3903 ddr_consistent_p (FILE *file,
3904 struct data_dependence_relation *ddr,
3905 VEC (lambda_vector, heap) *dist_vects,
3906 VEC (lambda_vector, heap) *dir_vects)
3910 /* If dump_file is set, output there. */
3911 if (dump_file && (dump_flags & TDF_DETAILS))
3914 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3916 lambda_vector b_dist_v;
3917 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3918 VEC_length (lambda_vector, dist_vects),
3919 DDR_NUM_DIST_VECTS (ddr));
3921 fprintf (file, "Banerjee dist vectors:\n");
3922 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3923 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3925 fprintf (file, "Omega dist vectors:\n");
3926 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3927 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3929 fprintf (file, "data dependence relation:\n");
3930 dump_data_dependence_relation (file, ddr);
3932 fprintf (file, ")\n");
3936 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3938 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3939 VEC_length (lambda_vector, dir_vects),
3940 DDR_NUM_DIR_VECTS (ddr));
3944 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3946 lambda_vector a_dist_v;
3947 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3949 /* Distance vectors are not ordered in the same way in the DDR
3950 and in the DIST_VECTS: search for a matching vector. */
3951 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3952 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3955 if (j == VEC_length (lambda_vector, dist_vects))
3957 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3958 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3959 fprintf (file, "not found in Omega dist vectors:\n");
3960 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3961 fprintf (file, "data dependence relation:\n");
3962 dump_data_dependence_relation (file, ddr);
3963 fprintf (file, ")\n");
3967 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3969 lambda_vector a_dir_v;
3970 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3972 /* Direction vectors are not ordered in the same way in the DDR
3973 and in the DIR_VECTS: search for a matching vector. */
3974 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3975 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3978 if (j == VEC_length (lambda_vector, dist_vects))
3980 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3981 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3982 fprintf (file, "not found in Omega dir vectors:\n");
3983 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3984 fprintf (file, "data dependence relation:\n");
3985 dump_data_dependence_relation (file, ddr);
3986 fprintf (file, ")\n");
3993 /* This computes the affine dependence relation between A and B with
3994 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3995 independence between two accesses, while CHREC_DONT_KNOW is used
3996 for representing the unknown relation.
3998 Note that it is possible to stop the computation of the dependence
3999 relation the first time we detect a CHREC_KNOWN element for a given
4003 compute_affine_dependence (struct data_dependence_relation *ddr,
4004 struct loop *loop_nest)
4006 struct data_reference *dra = DDR_A (ddr);
4007 struct data_reference *drb = DDR_B (ddr);
4009 if (dump_file && (dump_flags & TDF_DETAILS))
4011 fprintf (dump_file, "(compute_affine_dependence\n");
4012 fprintf (dump_file, " (stmt_a = \n");
4013 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4014 fprintf (dump_file, ")\n (stmt_b = \n");
4015 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4016 fprintf (dump_file, ")\n");
4019 /* Analyze only when the dependence relation is not yet known. */
4020 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4021 && !DDR_SELF_REFERENCE (ddr))
4023 dependence_stats.num_dependence_tests++;
4025 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4026 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4028 if (flag_check_data_deps)
4030 /* Compute the dependences using the first algorithm. */
4031 subscript_dependence_tester (ddr, loop_nest);
4033 if (dump_file && (dump_flags & TDF_DETAILS))
4035 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4036 dump_data_dependence_relation (dump_file, ddr);
4039 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4041 bool maybe_dependent;
4042 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4044 /* Save the result of the first DD analyzer. */
4045 dist_vects = DDR_DIST_VECTS (ddr);
4046 dir_vects = DDR_DIR_VECTS (ddr);
4048 /* Reset the information. */
4049 DDR_DIST_VECTS (ddr) = NULL;
4050 DDR_DIR_VECTS (ddr) = NULL;
4052 /* Compute the same information using Omega. */
4053 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4054 goto csys_dont_know;
4056 if (dump_file && (dump_flags & TDF_DETAILS))
4058 fprintf (dump_file, "Omega Analyzer\n");
4059 dump_data_dependence_relation (dump_file, ddr);
4062 /* Check that we get the same information. */
4063 if (maybe_dependent)
4064 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4069 subscript_dependence_tester (ddr, loop_nest);
4072 /* As a last case, if the dependence cannot be determined, or if
4073 the dependence is considered too difficult to determine, answer
4078 dependence_stats.num_dependence_undetermined++;
4080 if (dump_file && (dump_flags & TDF_DETAILS))
4082 fprintf (dump_file, "Data ref a:\n");
4083 dump_data_reference (dump_file, dra);
4084 fprintf (dump_file, "Data ref b:\n");
4085 dump_data_reference (dump_file, drb);
4086 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4088 finalize_ddr_dependent (ddr, chrec_dont_know);
4092 if (dump_file && (dump_flags & TDF_DETAILS))
4093 fprintf (dump_file, ")\n");
4096 /* This computes the dependence relation for the same data
4097 reference into DDR. */
4100 compute_self_dependence (struct data_dependence_relation *ddr)
4103 struct subscript *subscript;
4105 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4108 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4111 if (SUB_CONFLICTS_IN_A (subscript))
4112 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4113 if (SUB_CONFLICTS_IN_B (subscript))
4114 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4116 /* The accessed index overlaps for each iteration. */
4117 SUB_CONFLICTS_IN_A (subscript)
4118 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4119 SUB_CONFLICTS_IN_B (subscript)
4120 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4121 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4124 /* The distance vector is the zero vector. */
4125 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4126 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4129 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4130 the data references in DATAREFS, in the LOOP_NEST. When
4131 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4135 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4136 VEC (ddr_p, heap) **dependence_relations,
4137 VEC (loop_p, heap) *loop_nest,
4138 bool compute_self_and_rr)
4140 struct data_dependence_relation *ddr;
4141 struct data_reference *a, *b;
4144 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4145 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4146 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4148 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4149 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4151 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4154 if (compute_self_and_rr)
4155 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4157 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4158 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4159 compute_self_dependence (ddr);
4163 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4164 true if STMT clobbers memory, false otherwise. */
4167 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4169 bool clobbers_memory = false;
4172 enum gimple_code stmt_code = gimple_code (stmt);
4176 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4177 Calls have side-effects, except those to const or pure
4179 if ((stmt_code == GIMPLE_CALL
4180 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4181 || (stmt_code == GIMPLE_ASM
4182 && gimple_asm_volatile_p (stmt)))
4183 clobbers_memory = true;
4185 if (!gimple_vuse (stmt))
4186 return clobbers_memory;
4188 if (stmt_code == GIMPLE_ASSIGN)
4191 op0 = gimple_assign_lhs_ptr (stmt);
4192 op1 = gimple_assign_rhs1_ptr (stmt);
4195 || (REFERENCE_CLASS_P (*op1)
4196 && (base = get_base_address (*op1))
4197 && TREE_CODE (base) != SSA_NAME))
4199 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4201 ref->is_read = true;
4204 else if (stmt_code == GIMPLE_CALL)
4208 op0 = gimple_call_lhs_ptr (stmt);
4209 n = gimple_call_num_args (stmt);
4210 for (i = 0; i < n; i++)
4212 op1 = gimple_call_arg_ptr (stmt, i);
4215 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4217 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4219 ref->is_read = true;
4224 return clobbers_memory;
4228 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4230 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4232 ref->is_read = false;
4234 return clobbers_memory;
4237 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4238 reference, returns false, otherwise returns true. NEST is the outermost
4239 loop of the loop nest in which the references should be analyzed. */
4242 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4243 VEC (data_reference_p, heap) **datarefs)
4246 VEC (data_ref_loc, heap) *references;
4249 data_reference_p dr;
4251 if (get_references_in_stmt (stmt, &references))
4253 VEC_free (data_ref_loc, heap, references);
4257 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4259 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4260 *ref->pos, stmt, ref->is_read);
4261 gcc_assert (dr != NULL);
4262 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4264 VEC_free (data_ref_loc, heap, references);
4268 /* Stores the data references in STMT to DATAREFS. If there is an
4269 unanalyzable reference, returns false, otherwise returns true.
4270 NEST is the outermost loop of the loop nest in which the references
4271 should be instantiated, LOOP is the loop in which the references
4272 should be analyzed. */
4275 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4276 VEC (data_reference_p, heap) **datarefs)
4279 VEC (data_ref_loc, heap) *references;
4282 data_reference_p dr;
4284 if (get_references_in_stmt (stmt, &references))
4286 VEC_free (data_ref_loc, heap, references);
4290 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4292 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4293 gcc_assert (dr != NULL);
4294 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4297 VEC_free (data_ref_loc, heap, references);
4301 /* Search the data references in LOOP, and record the information into
4302 DATAREFS. Returns chrec_dont_know when failing to analyze a
4303 difficult case, returns NULL_TREE otherwise. */
4306 find_data_references_in_bb (struct loop *loop, basic_block bb,
4307 VEC (data_reference_p, heap) **datarefs)
4309 gimple_stmt_iterator bsi;
4311 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4313 gimple stmt = gsi_stmt (bsi);
4315 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4317 struct data_reference *res;
4318 res = XCNEW (struct data_reference);
4319 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4321 return chrec_dont_know;
4328 /* Search the data references in LOOP, and record the information into
4329 DATAREFS. Returns chrec_dont_know when failing to analyze a
4330 difficult case, returns NULL_TREE otherwise.
4332 TODO: This function should be made smarter so that it can handle address
4333 arithmetic as if they were array accesses, etc. */
4336 find_data_references_in_loop (struct loop *loop,
4337 VEC (data_reference_p, heap) **datarefs)
4339 basic_block bb, *bbs;
4342 bbs = get_loop_body_in_dom_order (loop);
4344 for (i = 0; i < loop->num_nodes; i++)
4348 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4351 return chrec_dont_know;
4359 /* Recursive helper function. */
4362 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4364 /* Inner loops of the nest should not contain siblings. Example:
4365 when there are two consecutive loops,
4376 the dependence relation cannot be captured by the distance
4381 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4383 return find_loop_nest_1 (loop->inner, loop_nest);
4387 /* Return false when the LOOP is not well nested. Otherwise return
4388 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4389 contain the loops from the outermost to the innermost, as they will
4390 appear in the classic distance vector. */
4393 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4395 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4397 return find_loop_nest_1 (loop->inner, loop_nest);
4401 /* Returns true when the data dependences have been computed, false otherwise.
4402 Given a loop nest LOOP, the following vectors are returned:
4403 DATAREFS is initialized to all the array elements contained in this loop,
4404 DEPENDENCE_RELATIONS contains the relations between the data references.
4405 Compute read-read and self relations if
4406 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4409 compute_data_dependences_for_loop (struct loop *loop,
4410 bool compute_self_and_read_read_dependences,
4411 VEC (loop_p, heap) **loop_nest,
4412 VEC (data_reference_p, heap) **datarefs,
4413 VEC (ddr_p, heap) **dependence_relations)
4417 memset (&dependence_stats, 0, sizeof (dependence_stats));
4419 /* If the loop nest is not well formed, or one of the data references
4420 is not computable, give up without spending time to compute other
4423 || !find_loop_nest (loop, loop_nest)
4424 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4426 struct data_dependence_relation *ddr;
4428 /* Insert a single relation into dependence_relations:
4430 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4431 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4435 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4436 compute_self_and_read_read_dependences);
4438 if (dump_file && (dump_flags & TDF_STATS))
4440 fprintf (dump_file, "Dependence tester statistics:\n");
4442 fprintf (dump_file, "Number of dependence tests: %d\n",
4443 dependence_stats.num_dependence_tests);
4444 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4445 dependence_stats.num_dependence_dependent);
4446 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4447 dependence_stats.num_dependence_independent);
4448 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4449 dependence_stats.num_dependence_undetermined);
4451 fprintf (dump_file, "Number of subscript tests: %d\n",
4452 dependence_stats.num_subscript_tests);
4453 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4454 dependence_stats.num_subscript_undetermined);
4455 fprintf (dump_file, "Number of same subscript function: %d\n",
4456 dependence_stats.num_same_subscript_function);
4458 fprintf (dump_file, "Number of ziv tests: %d\n",
4459 dependence_stats.num_ziv);
4460 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4461 dependence_stats.num_ziv_dependent);
4462 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4463 dependence_stats.num_ziv_independent);
4464 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4465 dependence_stats.num_ziv_unimplemented);
4467 fprintf (dump_file, "Number of siv tests: %d\n",
4468 dependence_stats.num_siv);
4469 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4470 dependence_stats.num_siv_dependent);
4471 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4472 dependence_stats.num_siv_independent);
4473 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4474 dependence_stats.num_siv_unimplemented);
4476 fprintf (dump_file, "Number of miv tests: %d\n",
4477 dependence_stats.num_miv);
4478 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4479 dependence_stats.num_miv_dependent);
4480 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4481 dependence_stats.num_miv_independent);
4482 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4483 dependence_stats.num_miv_unimplemented);
4489 /* Returns true when the data dependences for the basic block BB have been
4490 computed, false otherwise.
4491 DATAREFS is initialized to all the array elements contained in this basic
4492 block, DEPENDENCE_RELATIONS contains the relations between the data
4493 references. Compute read-read and self relations if
4494 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4496 compute_data_dependences_for_bb (basic_block bb,
4497 bool compute_self_and_read_read_dependences,
4498 VEC (data_reference_p, heap) **datarefs,
4499 VEC (ddr_p, heap) **dependence_relations)
4501 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4504 compute_all_dependences (*datarefs, dependence_relations, NULL,
4505 compute_self_and_read_read_dependences);
4509 /* Entry point (for testing only). Analyze all the data references
4510 and the dependence relations in LOOP.
4512 The data references are computed first.
4514 A relation on these nodes is represented by a complete graph. Some
4515 of the relations could be of no interest, thus the relations can be
4518 In the following function we compute all the relations. This is
4519 just a first implementation that is here for:
4520 - for showing how to ask for the dependence relations,
4521 - for the debugging the whole dependence graph,
4522 - for the dejagnu testcases and maintenance.
4524 It is possible to ask only for a part of the graph, avoiding to
4525 compute the whole dependence graph. The computed dependences are
4526 stored in a knowledge base (KB) such that later queries don't
4527 recompute the same information. The implementation of this KB is
4528 transparent to the optimizer, and thus the KB can be changed with a
4529 more efficient implementation, or the KB could be disabled. */
4531 analyze_all_data_dependences (struct loop *loop)
4534 int nb_data_refs = 10;
4535 VEC (data_reference_p, heap) *datarefs =
4536 VEC_alloc (data_reference_p, heap, nb_data_refs);
4537 VEC (ddr_p, heap) *dependence_relations =
4538 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4539 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4541 /* Compute DDs on the whole function. */
4542 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4543 &dependence_relations);
4547 dump_data_dependence_relations (dump_file, dependence_relations);
4548 fprintf (dump_file, "\n\n");
4550 if (dump_flags & TDF_DETAILS)
4551 dump_dist_dir_vectors (dump_file, dependence_relations);
4553 if (dump_flags & TDF_STATS)
4555 unsigned nb_top_relations = 0;
4556 unsigned nb_bot_relations = 0;
4557 unsigned nb_chrec_relations = 0;
4558 struct data_dependence_relation *ddr;
4560 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4562 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4565 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4569 nb_chrec_relations++;
4572 gather_stats_on_scev_database ();
4576 VEC_free (loop_p, heap, loop_nest);
4577 free_dependence_relations (dependence_relations);
4578 free_data_refs (datarefs);
4581 /* Computes all the data dependences and check that the results of
4582 several analyzers are the same. */
4585 tree_check_data_deps (void)
4588 struct loop *loop_nest;
4590 FOR_EACH_LOOP (li, loop_nest, 0)
4591 analyze_all_data_dependences (loop_nest);
4594 /* Free the memory used by a data dependence relation DDR. */
4597 free_dependence_relation (struct data_dependence_relation *ddr)
4602 if (DDR_SUBSCRIPTS (ddr))
4603 free_subscripts (DDR_SUBSCRIPTS (ddr));
4604 if (DDR_DIST_VECTS (ddr))
4605 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4606 if (DDR_DIR_VECTS (ddr))
4607 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4612 /* Free the memory used by the data dependence relations from
4613 DEPENDENCE_RELATIONS. */
4616 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4619 struct data_dependence_relation *ddr;
4621 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4623 free_dependence_relation (ddr);
4625 VEC_free (ddr_p, heap, dependence_relations);
4628 /* Free the memory used by the data references from DATAREFS. */
4631 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4634 struct data_reference *dr;
4636 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4638 VEC_free (data_reference_p, heap, datarefs);
4643 /* Dump vertex I in RDG to FILE. */
4646 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4648 struct vertex *v = &(rdg->vertices[i]);
4649 struct graph_edge *e;
4651 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4652 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4653 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4656 for (e = v->pred; e; e = e->pred_next)
4657 fprintf (file, " %d", e->src);
4659 fprintf (file, ") (out:");
4662 for (e = v->succ; e; e = e->succ_next)
4663 fprintf (file, " %d", e->dest);
4665 fprintf (file, ")\n");
4666 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4667 fprintf (file, ")\n");
4670 /* Call dump_rdg_vertex on stderr. */
4673 debug_rdg_vertex (struct graph *rdg, int i)
4675 dump_rdg_vertex (stderr, rdg, i);
4678 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4679 dumped vertices to that bitmap. */
4681 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4685 fprintf (file, "(%d\n", c);
4687 for (i = 0; i < rdg->n_vertices; i++)
4688 if (rdg->vertices[i].component == c)
4691 bitmap_set_bit (dumped, i);
4693 dump_rdg_vertex (file, rdg, i);
4696 fprintf (file, ")\n");
4699 /* Call dump_rdg_vertex on stderr. */
4702 debug_rdg_component (struct graph *rdg, int c)
4704 dump_rdg_component (stderr, rdg, c, NULL);
4707 /* Dump the reduced dependence graph RDG to FILE. */
4710 dump_rdg (FILE *file, struct graph *rdg)
4713 bitmap dumped = BITMAP_ALLOC (NULL);
4715 fprintf (file, "(rdg\n");
4717 for (i = 0; i < rdg->n_vertices; i++)
4718 if (!bitmap_bit_p (dumped, i))
4719 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4721 fprintf (file, ")\n");
4722 BITMAP_FREE (dumped);
4725 /* Call dump_rdg on stderr. */
4728 debug_rdg (struct graph *rdg)
4730 dump_rdg (stderr, rdg);
4734 dot_rdg_1 (FILE *file, struct graph *rdg)
4738 fprintf (file, "digraph RDG {\n");
4740 for (i = 0; i < rdg->n_vertices; i++)
4742 struct vertex *v = &(rdg->vertices[i]);
4743 struct graph_edge *e;
4745 /* Highlight reads from memory. */
4746 if (RDG_MEM_READS_STMT (rdg, i))
4747 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4749 /* Highlight stores to memory. */
4750 if (RDG_MEM_WRITE_STMT (rdg, i))
4751 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4754 for (e = v->succ; e; e = e->succ_next)
4755 switch (RDGE_TYPE (e))
4758 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4762 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4766 /* These are the most common dependences: don't print these. */
4767 fprintf (file, "%d -> %d \n", i, e->dest);
4771 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4779 fprintf (file, "}\n\n");
4782 /* Display the Reduced Dependence Graph using dotty. */
4783 extern void dot_rdg (struct graph *);
4786 dot_rdg (struct graph *rdg)
4788 /* When debugging, enable the following code. This cannot be used
4789 in production compilers because it calls "system". */
4791 FILE *file = fopen ("/tmp/rdg.dot", "w");
4792 gcc_assert (file != NULL);
4794 dot_rdg_1 (file, rdg);
4797 system ("dotty /tmp/rdg.dot &");
4799 dot_rdg_1 (stderr, rdg);
4803 /* This structure is used for recording the mapping statement index in
4806 struct GTY(()) rdg_vertex_info
4812 /* Returns the index of STMT in RDG. */
4815 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4817 struct rdg_vertex_info rvi, *slot;
4820 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4828 /* Creates an edge in RDG for each distance vector from DDR. The
4829 order that we keep track of in the RDG is the order in which
4830 statements have to be executed. */
4833 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4835 struct graph_edge *e;
4837 data_reference_p dra = DDR_A (ddr);
4838 data_reference_p drb = DDR_B (ddr);
4839 unsigned level = ddr_dependence_level (ddr);
4841 /* For non scalar dependences, when the dependence is REVERSED,
4842 statement B has to be executed before statement A. */
4844 && !DDR_REVERSED_P (ddr))
4846 data_reference_p tmp = dra;
4851 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4852 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4854 if (va < 0 || vb < 0)
4857 e = add_edge (rdg, va, vb);
4858 e->data = XNEW (struct rdg_edge);
4860 RDGE_LEVEL (e) = level;
4861 RDGE_RELATION (e) = ddr;
4863 /* Determines the type of the data dependence. */
4864 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4865 RDGE_TYPE (e) = input_dd;
4866 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4867 RDGE_TYPE (e) = output_dd;
4868 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4869 RDGE_TYPE (e) = flow_dd;
4870 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4871 RDGE_TYPE (e) = anti_dd;
4874 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4875 the index of DEF in RDG. */
4878 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4880 use_operand_p imm_use_p;
4881 imm_use_iterator iterator;
4883 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4885 struct graph_edge *e;
4886 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4891 e = add_edge (rdg, idef, use);
4892 e->data = XNEW (struct rdg_edge);
4893 RDGE_TYPE (e) = flow_dd;
4894 RDGE_RELATION (e) = NULL;
4898 /* Creates the edges of the reduced dependence graph RDG. */
4901 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4904 struct data_dependence_relation *ddr;
4905 def_operand_p def_p;
4908 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4909 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4910 create_rdg_edge_for_ddr (rdg, ddr);
4912 for (i = 0; i < rdg->n_vertices; i++)
4913 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4915 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4918 /* Build the vertices of the reduced dependence graph RDG. */
4921 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4926 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4928 VEC (data_ref_loc, heap) *references;
4930 struct vertex *v = &(rdg->vertices[i]);
4931 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4932 struct rdg_vertex_info **slot;
4936 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4943 v->data = XNEW (struct rdg_vertex);
4944 RDG_STMT (rdg, i) = stmt;
4946 RDG_MEM_WRITE_STMT (rdg, i) = false;
4947 RDG_MEM_READS_STMT (rdg, i) = false;
4948 if (gimple_code (stmt) == GIMPLE_PHI)
4951 get_references_in_stmt (stmt, &references);
4952 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4954 RDG_MEM_WRITE_STMT (rdg, i) = true;
4956 RDG_MEM_READS_STMT (rdg, i) = true;
4958 VEC_free (data_ref_loc, heap, references);
4962 /* Initialize STMTS with all the statements of LOOP. When
4963 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4964 which we discover statements is important as
4965 generate_loops_for_partition is using the same traversal for
4966 identifying statements. */
4969 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4972 basic_block *bbs = get_loop_body_in_dom_order (loop);
4974 for (i = 0; i < loop->num_nodes; i++)
4976 basic_block bb = bbs[i];
4977 gimple_stmt_iterator bsi;
4980 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4981 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4983 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4985 stmt = gsi_stmt (bsi);
4986 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4987 VEC_safe_push (gimple, heap, *stmts, stmt);
4994 /* Returns true when all the dependences are computable. */
4997 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5002 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5003 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5009 /* Computes a hash function for element ELT. */
5012 hash_stmt_vertex_info (const void *elt)
5014 const struct rdg_vertex_info *const rvi =
5015 (const struct rdg_vertex_info *) elt;
5016 gimple stmt = rvi->stmt;
5018 return htab_hash_pointer (stmt);
5021 /* Compares database elements E1 and E2. */
5024 eq_stmt_vertex_info (const void *e1, const void *e2)
5026 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5027 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5029 return elt1->stmt == elt2->stmt;
5032 /* Free the element E. */
5035 hash_stmt_vertex_del (void *e)
5040 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5041 statement of the loop nest, and one edge per data dependence or
5042 scalar dependence. */
5045 build_empty_rdg (int n_stmts)
5047 int nb_data_refs = 10;
5048 struct graph *rdg = new_graph (n_stmts);
5050 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5051 eq_stmt_vertex_info, hash_stmt_vertex_del);
5055 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5056 statement of the loop nest, and one edge per data dependence or
5057 scalar dependence. */
5060 build_rdg (struct loop *loop,
5061 VEC (loop_p, heap) **loop_nest,
5062 VEC (ddr_p, heap) **dependence_relations,
5063 VEC (data_reference_p, heap) **datarefs)
5065 struct graph *rdg = NULL;
5066 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5068 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5069 dependence_relations);
5071 if (known_dependences_p (*dependence_relations))
5073 stmts_from_loop (loop, &stmts);
5074 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5075 create_rdg_vertices (rdg, stmts);
5076 create_rdg_edges (rdg, *dependence_relations);
5079 VEC_free (gimple, heap, stmts);
5083 /* Free the reduced dependence graph RDG. */
5086 free_rdg (struct graph *rdg)
5090 for (i = 0; i < rdg->n_vertices; i++)
5092 struct vertex *v = &(rdg->vertices[i]);
5093 struct graph_edge *e;
5095 for (e = v->succ; e; e = e->succ_next)
5101 htab_delete (rdg->indices);
5105 /* Initialize STMTS with all the statements of LOOP that contain a
5109 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5112 basic_block *bbs = get_loop_body_in_dom_order (loop);
5114 for (i = 0; i < loop->num_nodes; i++)
5116 basic_block bb = bbs[i];
5117 gimple_stmt_iterator bsi;
5119 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5120 if (gimple_vdef (gsi_stmt (bsi)))
5121 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5127 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5128 that contains a data reference on its LHS with a stride of the same
5129 size as its unit type. */
5132 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5136 struct data_reference *dr;
5139 || !gimple_vdef (stmt)
5140 || !is_gimple_assign (stmt)
5141 || !gimple_assign_single_p (stmt)
5142 || !(op1 = gimple_assign_rhs1 (stmt))
5143 || !(integer_zerop (op1) || real_zerop (op1)))
5146 dr = XCNEW (struct data_reference);
5147 op0 = gimple_assign_lhs (stmt);
5149 DR_STMT (dr) = stmt;
5152 res = dr_analyze_innermost (dr)
5153 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5159 /* Initialize STMTS with all the statements of LOOP that contain a
5160 store to memory of the form "A[i] = 0". */
5163 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5167 gimple_stmt_iterator si;
5169 basic_block *bbs = get_loop_body_in_dom_order (loop);
5171 for (i = 0; i < loop->num_nodes; i++)
5172 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5173 if ((stmt = gsi_stmt (si))
5174 && stmt_with_adjacent_zero_store_dr_p (stmt))
5175 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5180 /* For a data reference REF, return the declaration of its base
5181 address or NULL_TREE if the base is not determined. */
5184 ref_base_address (gimple stmt, data_ref_loc *ref)
5186 tree base = NULL_TREE;
5188 struct data_reference *dr = XCNEW (struct data_reference);
5190 DR_STMT (dr) = stmt;
5191 DR_REF (dr) = *ref->pos;
5192 dr_analyze_innermost (dr);
5193 base_address = DR_BASE_ADDRESS (dr);
5198 switch (TREE_CODE (base_address))
5201 base = TREE_OPERAND (base_address, 0);
5205 base = base_address;
5214 /* Determines whether the statement from vertex V of the RDG has a
5215 definition used outside the loop that contains this statement. */
5218 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5220 gimple stmt = RDG_STMT (rdg, v);
5221 struct loop *loop = loop_containing_stmt (stmt);
5222 use_operand_p imm_use_p;
5223 imm_use_iterator iterator;
5225 def_operand_p def_p;
5230 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5232 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5234 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5242 /* Determines whether statements S1 and S2 access to similar memory
5243 locations. Two memory accesses are considered similar when they
5244 have the same base address declaration, i.e. when their
5245 ref_base_address is the same. */
5248 have_similar_memory_accesses (gimple s1, gimple s2)
5252 VEC (data_ref_loc, heap) *refs1, *refs2;
5253 data_ref_loc *ref1, *ref2;
5255 get_references_in_stmt (s1, &refs1);
5256 get_references_in_stmt (s2, &refs2);
5258 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5260 tree base1 = ref_base_address (s1, ref1);
5263 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5264 if (base1 == ref_base_address (s2, ref2))
5272 VEC_free (data_ref_loc, heap, refs1);
5273 VEC_free (data_ref_loc, heap, refs2);
5277 /* Helper function for the hashtab. */
5280 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5282 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5283 CONST_CAST_GIMPLE ((const_gimple) s2));
5286 /* Helper function for the hashtab. */
5289 ref_base_address_1 (const void *s)
5291 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5293 VEC (data_ref_loc, heap) *refs;
5297 get_references_in_stmt (stmt, &refs);
5299 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5302 res = htab_hash_pointer (ref_base_address (stmt, ref));
5306 VEC_free (data_ref_loc, heap, refs);
5310 /* Try to remove duplicated write data references from STMTS. */
5313 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5317 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5318 have_similar_memory_accesses_1, NULL);
5320 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5324 slot = htab_find_slot (seen, stmt, INSERT);
5327 VEC_ordered_remove (gimple, *stmts, i);
5330 *slot = (void *) stmt;
5338 /* Returns the index of PARAMETER in the parameters vector of the
5339 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5342 access_matrix_get_index_for_parameter (tree parameter,
5343 struct access_matrix *access_matrix)
5346 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5347 tree lambda_parameter;
5349 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5350 if (lambda_parameter == parameter)
5351 return i + AM_NB_INDUCTION_VARS (access_matrix);