1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
87 #include "tree-affine.h"
89 static struct datadep_stats
91 int num_dependence_tests;
92 int num_dependence_dependent;
93 int num_dependence_independent;
94 int num_dependence_undetermined;
96 int num_subscript_tests;
97 int num_subscript_undetermined;
98 int num_same_subscript_function;
101 int num_ziv_independent;
102 int num_ziv_dependent;
103 int num_ziv_unimplemented;
106 int num_siv_independent;
107 int num_siv_dependent;
108 int num_siv_unimplemented;
111 int num_miv_independent;
112 int num_miv_dependent;
113 int num_miv_unimplemented;
116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
117 struct data_reference *,
118 struct data_reference *,
120 /* Returns true iff A divides B. */
123 tree_fold_divides_p (const_tree a, const_tree b)
125 gcc_assert (TREE_CODE (a) == INTEGER_CST);
126 gcc_assert (TREE_CODE (b) == INTEGER_CST);
127 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
130 /* Returns true iff A divides B. */
133 int_divides_p (int a, int b)
135 return ((b % a) == 0);
140 /* Dump into FILE all the data references from DATAREFS. */
143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
146 struct data_reference *dr;
148 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
149 dump_data_reference (file, dr);
152 /* Dump into STDERR all the data references from DATAREFS. */
155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
157 dump_data_references (stderr, datarefs);
160 /* Dump to STDERR all the dependence relations from DDRS. */
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
165 dump_data_dependence_relations (stderr, ddrs);
168 /* Dump into FILE all the dependence relations from DDRS. */
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
175 struct data_dependence_relation *ddr;
177 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
178 dump_data_dependence_relation (file, ddr);
181 /* Print to STDERR the data_reference DR. */
184 debug_data_reference (struct data_reference *dr)
186 dump_data_reference (stderr, dr);
189 /* Dump function for a DATA_REFERENCE structure. */
192 dump_data_reference (FILE *outf,
193 struct data_reference *dr)
197 fprintf (outf, "#(Data Ref: \n");
198 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
199 fprintf (outf, "# stmt: ");
200 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
201 fprintf (outf, "# ref: ");
202 print_generic_stmt (outf, DR_REF (dr), 0);
203 fprintf (outf, "# base_object: ");
204 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
206 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
208 fprintf (outf, "# Access function %d: ", i);
209 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
211 fprintf (outf, "#)\n");
214 /* Dumps the affine function described by FN to the file OUTF. */
217 dump_affine_function (FILE *outf, affine_fn fn)
222 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
223 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
225 fprintf (outf, " + ");
226 print_generic_expr (outf, coef, TDF_SLIM);
227 fprintf (outf, " * x_%u", i);
231 /* Dumps the conflict function CF to the file OUTF. */
234 dump_conflict_function (FILE *outf, conflict_function *cf)
238 if (cf->n == NO_DEPENDENCE)
239 fprintf (outf, "no dependence\n");
240 else if (cf->n == NOT_KNOWN)
241 fprintf (outf, "not known\n");
244 for (i = 0; i < cf->n; i++)
247 dump_affine_function (outf, cf->fns[i]);
248 fprintf (outf, "]\n");
253 /* Dump function for a SUBSCRIPT structure. */
256 dump_subscript (FILE *outf, struct subscript *subscript)
258 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
260 fprintf (outf, "\n (subscript \n");
261 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
270 cf = SUB_CONFLICTS_IN_B (subscript);
271 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
272 dump_conflict_function (outf, cf);
273 if (CF_NONTRIVIAL_P (cf))
275 tree last_iteration = SUB_LAST_CONFLICT (subscript);
276 fprintf (outf, " last_conflict: ");
277 print_generic_stmt (outf, last_iteration, 0);
280 fprintf (outf, " (Subscript distance: ");
281 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
282 fprintf (outf, " )\n");
283 fprintf (outf, " )\n");
286 /* Print the classic direction vector DIRV to OUTF. */
289 print_direction_vector (FILE *outf,
295 for (eq = 0; eq < length; eq++)
297 enum data_dependence_direction dir = ((enum data_dependence_direction)
303 fprintf (outf, " +");
306 fprintf (outf, " -");
309 fprintf (outf, " =");
311 case dir_positive_or_equal:
312 fprintf (outf, " +=");
314 case dir_positive_or_negative:
315 fprintf (outf, " +-");
317 case dir_negative_or_equal:
318 fprintf (outf, " -=");
321 fprintf (outf, " *");
324 fprintf (outf, "indep");
328 fprintf (outf, "\n");
331 /* Print a vector of direction vectors. */
334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
340 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
341 print_direction_vector (outf, v, length);
344 /* Print out a vector VEC of length N to OUTFILE. */
347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
351 for (i = 0; i < n; i++)
352 fprintf (outfile, "%3d ", vector[i]);
353 fprintf (outfile, "\n");
356 /* Print a vector of distance vectors. */
359 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
365 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
366 print_lambda_vector (outf, v, length);
372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
374 dump_data_dependence_relation (stderr, ddr);
377 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
380 dump_data_dependence_relation (FILE *outf,
381 struct data_dependence_relation *ddr)
383 struct data_reference *dra, *drb;
385 fprintf (outf, "(Data Dep: \n");
387 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
394 dump_data_reference (outf, dra);
396 fprintf (outf, " (nil)\n");
398 dump_data_reference (outf, drb);
400 fprintf (outf, " (nil)\n");
402 fprintf (outf, " (don't know)\n)\n");
408 dump_data_reference (outf, dra);
409 dump_data_reference (outf, drb);
411 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
412 fprintf (outf, " (no dependence)\n");
414 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
419 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
421 fprintf (outf, " access_fn_A: ");
422 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
423 fprintf (outf, " access_fn_B: ");
424 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
425 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
428 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
429 fprintf (outf, " loop nest: (");
430 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
431 fprintf (outf, "%d ", loopi->num);
432 fprintf (outf, ")\n");
434 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
436 fprintf (outf, " distance_vector: ");
437 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
441 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
443 fprintf (outf, " direction_vector: ");
444 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
449 fprintf (outf, ")\n");
452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
455 dump_data_dependence_direction (FILE *file,
456 enum data_dependence_direction dir)
472 case dir_positive_or_negative:
473 fprintf (file, "+-");
476 case dir_positive_or_equal:
477 fprintf (file, "+=");
480 case dir_negative_or_equal:
481 fprintf (file, "-=");
493 /* Dumps the distance and direction vectors in FILE. DDRS contains
494 the dependence relations, and VECT_SIZE is the size of the
495 dependence vectors, or in other words the number of loops in the
499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
502 struct data_dependence_relation *ddr;
505 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
506 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
508 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
510 fprintf (file, "DISTANCE_V (");
511 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
512 fprintf (file, ")\n");
515 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
517 fprintf (file, "DIRECTION_V (");
518 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
519 fprintf (file, ")\n");
523 fprintf (file, "\n\n");
526 /* Dumps the data dependence relations DDRS in FILE. */
529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
532 struct data_dependence_relation *ddr;
534 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
535 dump_data_dependence_relation (file, ddr);
537 fprintf (file, "\n\n");
540 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
541 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
542 constant of type ssizetype, and returns true. If we cannot do this
543 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
548 tree *var, tree *off)
552 enum tree_code ocode = code;
560 *var = build_int_cst (type, 0);
561 *off = fold_convert (ssizetype, op0);
564 case POINTER_PLUS_EXPR:
569 split_constant_offset (op0, &var0, &off0);
570 split_constant_offset (op1, &var1, &off1);
571 *var = fold_build2 (code, type, var0, var1);
572 *off = size_binop (ocode, off0, off1);
576 if (TREE_CODE (op1) != INTEGER_CST)
579 split_constant_offset (op0, &var0, &off0);
580 *var = fold_build2 (MULT_EXPR, type, var0, op1);
581 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
587 HOST_WIDE_INT pbitsize, pbitpos;
588 enum machine_mode pmode;
589 int punsignedp, pvolatilep;
591 op0 = TREE_OPERAND (op0, 0);
592 if (!handled_component_p (op0))
595 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
596 &pmode, &punsignedp, &pvolatilep, false);
598 if (pbitpos % BITS_PER_UNIT != 0)
600 base = build_fold_addr_expr (base);
601 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
605 split_constant_offset (poffset, &poffset, &off1);
606 off0 = size_binop (PLUS_EXPR, off0, off1);
607 if (POINTER_TYPE_P (TREE_TYPE (base)))
608 base = fold_build_pointer_plus (base, poffset);
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
614 var0 = fold_convert (type, base);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
680 split_constant_offset (tree exp, tree *var, tree *off)
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
686 *off = ssize_int (0);
689 if (tree_is_chrec (exp))
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
697 *var = fold_convert (type, e);
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
706 canonicalize_base_object_address (tree addr)
712 /* The base address may be obtained by casting from integer, in that case
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
717 if (TREE_CODE (addr) != ADDR_EXPR)
720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
728 dr_analyze_innermost (struct data_reference *dr)
730 gimple stmt = DR_STMT (dr);
731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
733 HOST_WIDE_INT pbitsize, pbitpos;
735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
739 bool in_loop = (loop && loop->num);
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
748 if (pbitpos % BITS_PER_UNIT != 0)
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
755 if (TREE_CODE (base) == MEM_REF)
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
767 base = TREE_OPERAND (base, 0);
770 base = build_fold_addr_expr (base);
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
838 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
840 VEC (tree, heap) *access_fns = NULL;
841 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
842 tree base, off, access_fn = NULL_TREE;
843 basic_block before_loop = NULL;
847 DR_BASE_OBJECT (dr) = ref;
848 DR_ACCESS_FNS (dr) = NULL;
852 before_loop = block_before_loop (nest);
854 /* Analyze access functions of dimensions we know to be independent. */
855 while (handled_component_p (aref))
857 /* For ARRAY_REFs the base is the reference with the index replaced
859 if (TREE_CODE (aref) == ARRAY_REF)
861 op = TREE_OPERAND (aref, 1);
862 access_fn = analyze_scalar_evolution (loop, op);
863 access_fn = instantiate_scev (before_loop, loop, access_fn);
864 VEC_safe_push (tree, heap, access_fns, access_fn);
865 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
867 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
868 into a two element array with a constant index. The base is
869 then just the immediate underlying object. */
870 else if (TREE_CODE (aref) == REALPART_EXPR)
872 ref = TREE_OPERAND (ref, 0);
873 VEC_safe_push (tree, heap, access_fns, integer_zero_node);
875 else if (TREE_CODE (aref) == IMAGPART_EXPR)
877 ref = TREE_OPERAND (ref, 0);
878 VEC_safe_push (tree, heap, access_fns, integer_one_node);
881 aref = TREE_OPERAND (aref, 0);
884 if (TREE_CODE (aref) == MEM_REF)
886 op = TREE_OPERAND (aref, 0);
887 access_fn = analyze_scalar_evolution (loop, op);
888 access_fn = instantiate_scev (before_loop, loop, access_fn);
889 base = initial_condition (access_fn);
890 split_constant_offset (base, &base, &off);
891 if (!integer_zerop (TREE_OPERAND (aref, 1)))
893 off = size_binop (PLUS_EXPR, off,
894 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
895 TREE_OPERAND (aref, 1)
896 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
898 access_fn = chrec_replace_initial_condition (access_fn,
899 fold_convert (TREE_TYPE (base), off));
901 TREE_OPERAND (aref, 0) = base;
902 VEC_safe_push (tree, heap, access_fns, access_fn);
905 if (TREE_CODE (ref) == MEM_REF
906 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
907 && integer_zerop (TREE_OPERAND (ref, 1)))
908 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
910 /* For canonicalization purposes we'd like to strip all outermost
911 zero-offset component-refs.
912 ??? For now simply handle zero-index array-refs. */
913 while (TREE_CODE (ref) == ARRAY_REF
914 && integer_zerop (TREE_OPERAND (ref, 1)))
915 ref = TREE_OPERAND (ref, 0);
917 DR_BASE_OBJECT (dr) = ref;
918 DR_ACCESS_FNS (dr) = access_fns;
921 /* Extracts the alias analysis information from the memory reference DR. */
924 dr_analyze_alias (struct data_reference *dr)
926 tree ref = DR_REF (dr);
927 tree base = get_base_address (ref), addr;
929 if (INDIRECT_REF_P (base)
930 || TREE_CODE (base) == MEM_REF)
932 addr = TREE_OPERAND (base, 0);
933 if (TREE_CODE (addr) == SSA_NAME)
934 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
938 /* Frees data reference DR. */
941 free_data_ref (data_reference_p dr)
943 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
947 /* Analyzes memory reference MEMREF accessed in STMT. The reference
948 is read if IS_READ is true, write otherwise. Returns the
949 data_reference description of MEMREF. NEST is the outermost loop
950 in which the reference should be instantiated, LOOP is the loop in
951 which the data reference should be analyzed. */
953 struct data_reference *
954 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
957 struct data_reference *dr;
959 if (dump_file && (dump_flags & TDF_DETAILS))
961 fprintf (dump_file, "Creating dr for ");
962 print_generic_expr (dump_file, memref, TDF_SLIM);
963 fprintf (dump_file, "\n");
966 dr = XCNEW (struct data_reference);
968 DR_REF (dr) = memref;
969 DR_IS_READ (dr) = is_read;
971 dr_analyze_innermost (dr);
972 dr_analyze_indices (dr, nest, loop);
973 dr_analyze_alias (dr);
975 if (dump_file && (dump_flags & TDF_DETAILS))
978 fprintf (dump_file, "\tbase_address: ");
979 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
980 fprintf (dump_file, "\n\toffset from base address: ");
981 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
982 fprintf (dump_file, "\n\tconstant offset from base address: ");
983 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
984 fprintf (dump_file, "\n\tstep: ");
985 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
986 fprintf (dump_file, "\n\taligned to: ");
987 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
988 fprintf (dump_file, "\n\tbase_object: ");
989 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
990 fprintf (dump_file, "\n");
991 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
993 fprintf (dump_file, "\tAccess function %d: ", i);
994 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1001 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1004 dr_equal_offsets_p1 (tree offset1, tree offset2)
1008 STRIP_NOPS (offset1);
1009 STRIP_NOPS (offset2);
1011 if (offset1 == offset2)
1014 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1015 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1018 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1019 TREE_OPERAND (offset2, 0));
1021 if (!res || !BINARY_CLASS_P (offset1))
1024 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1025 TREE_OPERAND (offset2, 1));
1030 /* Check if DRA and DRB have equal offsets. */
1032 dr_equal_offsets_p (struct data_reference *dra,
1033 struct data_reference *drb)
1035 tree offset1, offset2;
1037 offset1 = DR_OFFSET (dra);
1038 offset2 = DR_OFFSET (drb);
1040 return dr_equal_offsets_p1 (offset1, offset2);
1043 /* Returns true if FNA == FNB. */
1046 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1048 unsigned i, n = VEC_length (tree, fna);
1050 if (n != VEC_length (tree, fnb))
1053 for (i = 0; i < n; i++)
1054 if (!operand_equal_p (VEC_index (tree, fna, i),
1055 VEC_index (tree, fnb, i), 0))
1061 /* If all the functions in CF are the same, returns one of them,
1062 otherwise returns NULL. */
1065 common_affine_function (conflict_function *cf)
1070 if (!CF_NONTRIVIAL_P (cf))
1075 for (i = 1; i < cf->n; i++)
1076 if (!affine_function_equal_p (comm, cf->fns[i]))
1082 /* Returns the base of the affine function FN. */
1085 affine_function_base (affine_fn fn)
1087 return VEC_index (tree, fn, 0);
1090 /* Returns true if FN is a constant. */
1093 affine_function_constant_p (affine_fn fn)
1098 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1099 if (!integer_zerop (coef))
1105 /* Returns true if FN is the zero constant function. */
1108 affine_function_zero_p (affine_fn fn)
1110 return (integer_zerop (affine_function_base (fn))
1111 && affine_function_constant_p (fn));
1114 /* Returns a signed integer type with the largest precision from TA
1118 signed_type_for_types (tree ta, tree tb)
1120 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1121 return signed_type_for (ta);
1123 return signed_type_for (tb);
1126 /* Applies operation OP on affine functions FNA and FNB, and returns the
1130 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1136 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1138 n = VEC_length (tree, fna);
1139 m = VEC_length (tree, fnb);
1143 n = VEC_length (tree, fnb);
1144 m = VEC_length (tree, fna);
1147 ret = VEC_alloc (tree, heap, m);
1148 for (i = 0; i < n; i++)
1150 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1151 TREE_TYPE (VEC_index (tree, fnb, i)));
1153 VEC_quick_push (tree, ret,
1154 fold_build2 (op, type,
1155 VEC_index (tree, fna, i),
1156 VEC_index (tree, fnb, i)));
1159 for (; VEC_iterate (tree, fna, i, coef); i++)
1160 VEC_quick_push (tree, ret,
1161 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1162 coef, integer_zero_node));
1163 for (; VEC_iterate (tree, fnb, i, coef); i++)
1164 VEC_quick_push (tree, ret,
1165 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1166 integer_zero_node, coef));
1171 /* Returns the sum of affine functions FNA and FNB. */
1174 affine_fn_plus (affine_fn fna, affine_fn fnb)
1176 return affine_fn_op (PLUS_EXPR, fna, fnb);
1179 /* Returns the difference of affine functions FNA and FNB. */
1182 affine_fn_minus (affine_fn fna, affine_fn fnb)
1184 return affine_fn_op (MINUS_EXPR, fna, fnb);
1187 /* Frees affine function FN. */
1190 affine_fn_free (affine_fn fn)
1192 VEC_free (tree, heap, fn);
1195 /* Determine for each subscript in the data dependence relation DDR
1199 compute_subscript_distance (struct data_dependence_relation *ddr)
1201 conflict_function *cf_a, *cf_b;
1202 affine_fn fn_a, fn_b, diff;
1204 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1208 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1210 struct subscript *subscript;
1212 subscript = DDR_SUBSCRIPT (ddr, i);
1213 cf_a = SUB_CONFLICTS_IN_A (subscript);
1214 cf_b = SUB_CONFLICTS_IN_B (subscript);
1216 fn_a = common_affine_function (cf_a);
1217 fn_b = common_affine_function (cf_b);
1220 SUB_DISTANCE (subscript) = chrec_dont_know;
1223 diff = affine_fn_minus (fn_a, fn_b);
1225 if (affine_function_constant_p (diff))
1226 SUB_DISTANCE (subscript) = affine_function_base (diff);
1228 SUB_DISTANCE (subscript) = chrec_dont_know;
1230 affine_fn_free (diff);
1235 /* Returns the conflict function for "unknown". */
1237 static conflict_function *
1238 conflict_fn_not_known (void)
1240 conflict_function *fn = XCNEW (conflict_function);
1246 /* Returns the conflict function for "independent". */
1248 static conflict_function *
1249 conflict_fn_no_dependence (void)
1251 conflict_function *fn = XCNEW (conflict_function);
1252 fn->n = NO_DEPENDENCE;
1257 /* Returns true if the address of OBJ is invariant in LOOP. */
1260 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1262 while (handled_component_p (obj))
1264 if (TREE_CODE (obj) == ARRAY_REF)
1266 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1267 need to check the stride and the lower bound of the reference. */
1268 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1270 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1274 else if (TREE_CODE (obj) == COMPONENT_REF)
1276 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1280 obj = TREE_OPERAND (obj, 0);
1283 if (!INDIRECT_REF_P (obj)
1284 && TREE_CODE (obj) != MEM_REF)
1287 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1291 /* Returns false if we can prove that data references A and B do not alias,
1292 true otherwise. If LOOP_NEST is false no cross-iteration aliases are
1296 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1299 tree addr_a = DR_BASE_OBJECT (a);
1300 tree addr_b = DR_BASE_OBJECT (b);
1302 /* If we are not processing a loop nest but scalar code we
1303 do not need to care about possible cross-iteration dependences
1304 and thus can process the full original reference. Do so,
1305 similar to how loop invariant motion applies extra offset-based
1309 aff_tree off1, off2;
1310 double_int size1, size2;
1311 get_inner_reference_aff (DR_REF (a), &off1, &size1);
1312 get_inner_reference_aff (DR_REF (b), &off2, &size2);
1313 aff_combination_scale (&off1, double_int_minus_one);
1314 aff_combination_add (&off2, &off1);
1315 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1319 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1320 return refs_output_dependent_p (addr_a, addr_b);
1321 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1322 return refs_anti_dependent_p (addr_a, addr_b);
1323 return refs_may_alias_p (addr_a, addr_b);
1326 static void compute_self_dependence (struct data_dependence_relation *);
1328 /* Initialize a data dependence relation between data accesses A and
1329 B. NB_LOOPS is the number of loops surrounding the references: the
1330 size of the classic distance/direction vectors. */
1332 static struct data_dependence_relation *
1333 initialize_data_dependence_relation (struct data_reference *a,
1334 struct data_reference *b,
1335 VEC (loop_p, heap) *loop_nest)
1337 struct data_dependence_relation *res;
1340 res = XNEW (struct data_dependence_relation);
1343 DDR_LOOP_NEST (res) = NULL;
1344 DDR_REVERSED_P (res) = false;
1345 DDR_SUBSCRIPTS (res) = NULL;
1346 DDR_DIR_VECTS (res) = NULL;
1347 DDR_DIST_VECTS (res) = NULL;
1349 if (a == NULL || b == NULL)
1351 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1355 /* If the data references do not alias, then they are independent. */
1356 if (!dr_may_alias_p (a, b, loop_nest != NULL))
1358 DDR_ARE_DEPENDENT (res) = chrec_known;
1362 /* When the references are exactly the same, don't spend time doing
1363 the data dependence tests, just initialize the ddr and return. */
1364 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1366 DDR_AFFINE_P (res) = true;
1367 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1368 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1369 DDR_LOOP_NEST (res) = loop_nest;
1370 DDR_INNER_LOOP (res) = 0;
1371 DDR_SELF_REFERENCE (res) = true;
1372 compute_self_dependence (res);
1376 /* If the references do not access the same object, we do not know
1377 whether they alias or not. */
1378 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1380 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1384 /* If the base of the object is not invariant in the loop nest, we cannot
1385 analyze it. TODO -- in fact, it would suffice to record that there may
1386 be arbitrary dependences in the loops where the base object varies. */
1388 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1389 DR_BASE_OBJECT (a)))
1391 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1395 /* If the number of dimensions of the access to not agree we can have
1396 a pointer access to a component of the array element type and an
1397 array access while the base-objects are still the same. Punt. */
1398 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1400 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1404 DDR_AFFINE_P (res) = true;
1405 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1406 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1407 DDR_LOOP_NEST (res) = loop_nest;
1408 DDR_INNER_LOOP (res) = 0;
1409 DDR_SELF_REFERENCE (res) = false;
1411 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1413 struct subscript *subscript;
1415 subscript = XNEW (struct subscript);
1416 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1417 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1418 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1419 SUB_DISTANCE (subscript) = chrec_dont_know;
1420 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1426 /* Frees memory used by the conflict function F. */
1429 free_conflict_function (conflict_function *f)
1433 if (CF_NONTRIVIAL_P (f))
1435 for (i = 0; i < f->n; i++)
1436 affine_fn_free (f->fns[i]);
1441 /* Frees memory used by SUBSCRIPTS. */
1444 free_subscripts (VEC (subscript_p, heap) *subscripts)
1449 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1451 free_conflict_function (s->conflicting_iterations_in_a);
1452 free_conflict_function (s->conflicting_iterations_in_b);
1455 VEC_free (subscript_p, heap, subscripts);
1458 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1462 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1465 if (dump_file && (dump_flags & TDF_DETAILS))
1467 fprintf (dump_file, "(dependence classified: ");
1468 print_generic_expr (dump_file, chrec, 0);
1469 fprintf (dump_file, ")\n");
1472 DDR_ARE_DEPENDENT (ddr) = chrec;
1473 free_subscripts (DDR_SUBSCRIPTS (ddr));
1474 DDR_SUBSCRIPTS (ddr) = NULL;
1477 /* The dependence relation DDR cannot be represented by a distance
1481 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1483 if (dump_file && (dump_flags & TDF_DETAILS))
1484 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1486 DDR_AFFINE_P (ddr) = false;
1491 /* This section contains the classic Banerjee tests. */
1493 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1494 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1497 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1499 return (evolution_function_is_constant_p (chrec_a)
1500 && evolution_function_is_constant_p (chrec_b));
1503 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1504 variable, i.e., if the SIV (Single Index Variable) test is true. */
1507 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1509 if ((evolution_function_is_constant_p (chrec_a)
1510 && evolution_function_is_univariate_p (chrec_b))
1511 || (evolution_function_is_constant_p (chrec_b)
1512 && evolution_function_is_univariate_p (chrec_a)))
1515 if (evolution_function_is_univariate_p (chrec_a)
1516 && evolution_function_is_univariate_p (chrec_b))
1518 switch (TREE_CODE (chrec_a))
1520 case POLYNOMIAL_CHREC:
1521 switch (TREE_CODE (chrec_b))
1523 case POLYNOMIAL_CHREC:
1524 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1539 /* Creates a conflict function with N dimensions. The affine functions
1540 in each dimension follow. */
1542 static conflict_function *
1543 conflict_fn (unsigned n, ...)
1546 conflict_function *ret = XCNEW (conflict_function);
1549 gcc_assert (0 < n && n <= MAX_DIM);
1553 for (i = 0; i < n; i++)
1554 ret->fns[i] = va_arg (ap, affine_fn);
1560 /* Returns constant affine function with value CST. */
1563 affine_fn_cst (tree cst)
1565 affine_fn fn = VEC_alloc (tree, heap, 1);
1566 VEC_quick_push (tree, fn, cst);
1570 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1573 affine_fn_univar (tree cst, unsigned dim, tree coef)
1575 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1578 gcc_assert (dim > 0);
1579 VEC_quick_push (tree, fn, cst);
1580 for (i = 1; i < dim; i++)
1581 VEC_quick_push (tree, fn, integer_zero_node);
1582 VEC_quick_push (tree, fn, coef);
1586 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1587 *OVERLAPS_B are initialized to the functions that describe the
1588 relation between the elements accessed twice by CHREC_A and
1589 CHREC_B. For k >= 0, the following property is verified:
1591 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1594 analyze_ziv_subscript (tree chrec_a,
1596 conflict_function **overlaps_a,
1597 conflict_function **overlaps_b,
1598 tree *last_conflicts)
1600 tree type, difference;
1601 dependence_stats.num_ziv++;
1603 if (dump_file && (dump_flags & TDF_DETAILS))
1604 fprintf (dump_file, "(analyze_ziv_subscript \n");
1606 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1607 chrec_a = chrec_convert (type, chrec_a, NULL);
1608 chrec_b = chrec_convert (type, chrec_b, NULL);
1609 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1611 switch (TREE_CODE (difference))
1614 if (integer_zerop (difference))
1616 /* The difference is equal to zero: the accessed index
1617 overlaps for each iteration in the loop. */
1618 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1619 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1620 *last_conflicts = chrec_dont_know;
1621 dependence_stats.num_ziv_dependent++;
1625 /* The accesses do not overlap. */
1626 *overlaps_a = conflict_fn_no_dependence ();
1627 *overlaps_b = conflict_fn_no_dependence ();
1628 *last_conflicts = integer_zero_node;
1629 dependence_stats.num_ziv_independent++;
1634 /* We're not sure whether the indexes overlap. For the moment,
1635 conservatively answer "don't know". */
1636 if (dump_file && (dump_flags & TDF_DETAILS))
1637 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1639 *overlaps_a = conflict_fn_not_known ();
1640 *overlaps_b = conflict_fn_not_known ();
1641 *last_conflicts = chrec_dont_know;
1642 dependence_stats.num_ziv_unimplemented++;
1646 if (dump_file && (dump_flags & TDF_DETAILS))
1647 fprintf (dump_file, ")\n");
1650 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1651 and only if it fits to the int type. If this is not the case, or the
1652 bound on the number of iterations of LOOP could not be derived, returns
1656 max_stmt_executions_tree (struct loop *loop)
1660 if (!max_stmt_executions (loop, true, &nit))
1661 return chrec_dont_know;
1663 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1664 return chrec_dont_know;
1666 return double_int_to_tree (unsigned_type_node, nit);
1669 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1670 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1671 *OVERLAPS_B are initialized to the functions that describe the
1672 relation between the elements accessed twice by CHREC_A and
1673 CHREC_B. For k >= 0, the following property is verified:
1675 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1678 analyze_siv_subscript_cst_affine (tree chrec_a,
1680 conflict_function **overlaps_a,
1681 conflict_function **overlaps_b,
1682 tree *last_conflicts)
1684 bool value0, value1, value2;
1685 tree type, difference, tmp;
1687 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1688 chrec_a = chrec_convert (type, chrec_a, NULL);
1689 chrec_b = chrec_convert (type, chrec_b, NULL);
1690 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1692 if (!chrec_is_positive (initial_condition (difference), &value0))
1694 if (dump_file && (dump_flags & TDF_DETAILS))
1695 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1697 dependence_stats.num_siv_unimplemented++;
1698 *overlaps_a = conflict_fn_not_known ();
1699 *overlaps_b = conflict_fn_not_known ();
1700 *last_conflicts = chrec_dont_know;
1705 if (value0 == false)
1707 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1709 if (dump_file && (dump_flags & TDF_DETAILS))
1710 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1712 *overlaps_a = conflict_fn_not_known ();
1713 *overlaps_b = conflict_fn_not_known ();
1714 *last_conflicts = chrec_dont_know;
1715 dependence_stats.num_siv_unimplemented++;
1724 chrec_b = {10, +, 1}
1727 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1729 HOST_WIDE_INT numiter;
1730 struct loop *loop = get_chrec_loop (chrec_b);
1732 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1733 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1734 fold_build1 (ABS_EXPR, type, difference),
1735 CHREC_RIGHT (chrec_b));
1736 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1737 *last_conflicts = integer_one_node;
1740 /* Perform weak-zero siv test to see if overlap is
1741 outside the loop bounds. */
1742 numiter = max_stmt_executions_int (loop, true);
1745 && compare_tree_int (tmp, numiter) > 0)
1747 free_conflict_function (*overlaps_a);
1748 free_conflict_function (*overlaps_b);
1749 *overlaps_a = conflict_fn_no_dependence ();
1750 *overlaps_b = conflict_fn_no_dependence ();
1751 *last_conflicts = integer_zero_node;
1752 dependence_stats.num_siv_independent++;
1755 dependence_stats.num_siv_dependent++;
1759 /* When the step does not divide the difference, there are
1763 *overlaps_a = conflict_fn_no_dependence ();
1764 *overlaps_b = conflict_fn_no_dependence ();
1765 *last_conflicts = integer_zero_node;
1766 dependence_stats.num_siv_independent++;
1775 chrec_b = {10, +, -1}
1777 In this case, chrec_a will not overlap with chrec_b. */
1778 *overlaps_a = conflict_fn_no_dependence ();
1779 *overlaps_b = conflict_fn_no_dependence ();
1780 *last_conflicts = integer_zero_node;
1781 dependence_stats.num_siv_independent++;
1788 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1790 if (dump_file && (dump_flags & TDF_DETAILS))
1791 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1793 *overlaps_a = conflict_fn_not_known ();
1794 *overlaps_b = conflict_fn_not_known ();
1795 *last_conflicts = chrec_dont_know;
1796 dependence_stats.num_siv_unimplemented++;
1801 if (value2 == false)
1805 chrec_b = {10, +, -1}
1807 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1809 HOST_WIDE_INT numiter;
1810 struct loop *loop = get_chrec_loop (chrec_b);
1812 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1813 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1814 CHREC_RIGHT (chrec_b));
1815 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1816 *last_conflicts = integer_one_node;
1818 /* Perform weak-zero siv test to see if overlap is
1819 outside the loop bounds. */
1820 numiter = max_stmt_executions_int (loop, true);
1823 && compare_tree_int (tmp, numiter) > 0)
1825 free_conflict_function (*overlaps_a);
1826 free_conflict_function (*overlaps_b);
1827 *overlaps_a = conflict_fn_no_dependence ();
1828 *overlaps_b = conflict_fn_no_dependence ();
1829 *last_conflicts = integer_zero_node;
1830 dependence_stats.num_siv_independent++;
1833 dependence_stats.num_siv_dependent++;
1837 /* When the step does not divide the difference, there
1841 *overlaps_a = conflict_fn_no_dependence ();
1842 *overlaps_b = conflict_fn_no_dependence ();
1843 *last_conflicts = integer_zero_node;
1844 dependence_stats.num_siv_independent++;
1854 In this case, chrec_a will not overlap with chrec_b. */
1855 *overlaps_a = conflict_fn_no_dependence ();
1856 *overlaps_b = conflict_fn_no_dependence ();
1857 *last_conflicts = integer_zero_node;
1858 dependence_stats.num_siv_independent++;
1866 /* Helper recursive function for initializing the matrix A. Returns
1867 the initial value of CHREC. */
1870 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1874 switch (TREE_CODE (chrec))
1876 case POLYNOMIAL_CHREC:
1877 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1879 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1880 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1886 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1887 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1889 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1894 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1895 return chrec_convert (chrec_type (chrec), op, NULL);
1900 /* Handle ~X as -1 - X. */
1901 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1902 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1903 build_int_cst (TREE_TYPE (chrec), -1), op);
1915 #define FLOOR_DIV(x,y) ((x) / (y))
1917 /* Solves the special case of the Diophantine equation:
1918 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1920 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1921 number of iterations that loops X and Y run. The overlaps will be
1922 constructed as evolutions in dimension DIM. */
1925 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1926 affine_fn *overlaps_a,
1927 affine_fn *overlaps_b,
1928 tree *last_conflicts, int dim)
1930 if (((step_a > 0 && step_b > 0)
1931 || (step_a < 0 && step_b < 0)))
1933 int step_overlaps_a, step_overlaps_b;
1934 int gcd_steps_a_b, last_conflict, tau2;
1936 gcd_steps_a_b = gcd (step_a, step_b);
1937 step_overlaps_a = step_b / gcd_steps_a_b;
1938 step_overlaps_b = step_a / gcd_steps_a_b;
1942 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1943 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1944 last_conflict = tau2;
1945 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1948 *last_conflicts = chrec_dont_know;
1950 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1951 build_int_cst (NULL_TREE,
1953 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1954 build_int_cst (NULL_TREE,
1960 *overlaps_a = affine_fn_cst (integer_zero_node);
1961 *overlaps_b = affine_fn_cst (integer_zero_node);
1962 *last_conflicts = integer_zero_node;
1966 /* Solves the special case of a Diophantine equation where CHREC_A is
1967 an affine bivariate function, and CHREC_B is an affine univariate
1968 function. For example,
1970 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1972 has the following overlapping functions:
1974 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1975 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1976 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1978 FORNOW: This is a specialized implementation for a case occurring in
1979 a common benchmark. Implement the general algorithm. */
1982 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1983 conflict_function **overlaps_a,
1984 conflict_function **overlaps_b,
1985 tree *last_conflicts)
1987 bool xz_p, yz_p, xyz_p;
1988 int step_x, step_y, step_z;
1989 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1990 affine_fn overlaps_a_xz, overlaps_b_xz;
1991 affine_fn overlaps_a_yz, overlaps_b_yz;
1992 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1993 affine_fn ova1, ova2, ovb;
1994 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1996 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1997 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1998 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2001 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2002 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2003 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2005 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2007 if (dump_file && (dump_flags & TDF_DETAILS))
2008 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2010 *overlaps_a = conflict_fn_not_known ();
2011 *overlaps_b = conflict_fn_not_known ();
2012 *last_conflicts = chrec_dont_know;
2016 niter = MIN (niter_x, niter_z);
2017 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2020 &last_conflicts_xz, 1);
2021 niter = MIN (niter_y, niter_z);
2022 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2025 &last_conflicts_yz, 2);
2026 niter = MIN (niter_x, niter_z);
2027 niter = MIN (niter_y, niter);
2028 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2031 &last_conflicts_xyz, 3);
2033 xz_p = !integer_zerop (last_conflicts_xz);
2034 yz_p = !integer_zerop (last_conflicts_yz);
2035 xyz_p = !integer_zerop (last_conflicts_xyz);
2037 if (xz_p || yz_p || xyz_p)
2039 ova1 = affine_fn_cst (integer_zero_node);
2040 ova2 = affine_fn_cst (integer_zero_node);
2041 ovb = affine_fn_cst (integer_zero_node);
2044 affine_fn t0 = ova1;
2047 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2048 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2049 affine_fn_free (t0);
2050 affine_fn_free (t2);
2051 *last_conflicts = last_conflicts_xz;
2055 affine_fn t0 = ova2;
2058 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2059 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2060 affine_fn_free (t0);
2061 affine_fn_free (t2);
2062 *last_conflicts = last_conflicts_yz;
2066 affine_fn t0 = ova1;
2067 affine_fn t2 = ova2;
2070 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2071 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2072 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2073 affine_fn_free (t0);
2074 affine_fn_free (t2);
2075 affine_fn_free (t4);
2076 *last_conflicts = last_conflicts_xyz;
2078 *overlaps_a = conflict_fn (2, ova1, ova2);
2079 *overlaps_b = conflict_fn (1, ovb);
2083 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2084 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2085 *last_conflicts = integer_zero_node;
2088 affine_fn_free (overlaps_a_xz);
2089 affine_fn_free (overlaps_b_xz);
2090 affine_fn_free (overlaps_a_yz);
2091 affine_fn_free (overlaps_b_yz);
2092 affine_fn_free (overlaps_a_xyz);
2093 affine_fn_free (overlaps_b_xyz);
2096 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2099 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2102 memcpy (vec2, vec1, size * sizeof (*vec1));
2105 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2108 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2113 for (i = 0; i < m; i++)
2114 lambda_vector_copy (mat1[i], mat2[i], n);
2117 /* Store the N x N identity matrix in MAT. */
2120 lambda_matrix_id (lambda_matrix mat, int size)
2124 for (i = 0; i < size; i++)
2125 for (j = 0; j < size; j++)
2126 mat[i][j] = (i == j) ? 1 : 0;
2129 /* Return the first nonzero element of vector VEC1 between START and N.
2130 We must have START <= N. Returns N if VEC1 is the zero vector. */
2133 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2136 while (j < n && vec1[j] == 0)
2141 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2142 R2 = R2 + CONST1 * R1. */
2145 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2152 for (i = 0; i < n; i++)
2153 mat[r2][i] += const1 * mat[r1][i];
2156 /* Swap rows R1 and R2 in matrix MAT. */
2159 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2168 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2169 and store the result in VEC2. */
2172 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2173 int size, int const1)
2178 lambda_vector_clear (vec2, size);
2180 for (i = 0; i < size; i++)
2181 vec2[i] = const1 * vec1[i];
2184 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2187 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2190 lambda_vector_mult_const (vec1, vec2, size, -1);
2193 /* Negate row R1 of matrix MAT which has N columns. */
2196 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2198 lambda_vector_negate (mat[r1], mat[r1], n);
2201 /* Return true if two vectors are equal. */
2204 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2207 for (i = 0; i < size; i++)
2208 if (vec1[i] != vec2[i])
2213 /* Given an M x N integer matrix A, this function determines an M x
2214 M unimodular matrix U, and an M x N echelon matrix S such that
2215 "U.A = S". This decomposition is also known as "right Hermite".
2217 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2218 Restructuring Compilers" Utpal Banerjee. */
2221 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2222 lambda_matrix S, lambda_matrix U)
2226 lambda_matrix_copy (A, S, m, n);
2227 lambda_matrix_id (U, m);
2229 for (j = 0; j < n; j++)
2231 if (lambda_vector_first_nz (S[j], m, i0) < m)
2234 for (i = m - 1; i >= i0; i--)
2236 while (S[i][j] != 0)
2238 int sigma, factor, a, b;
2242 sigma = (a * b < 0) ? -1: 1;
2245 factor = sigma * (a / b);
2247 lambda_matrix_row_add (S, n, i, i-1, -factor);
2248 lambda_matrix_row_exchange (S, i, i-1);
2250 lambda_matrix_row_add (U, m, i, i-1, -factor);
2251 lambda_matrix_row_exchange (U, i, i-1);
2258 /* Determines the overlapping elements due to accesses CHREC_A and
2259 CHREC_B, that are affine functions. This function cannot handle
2260 symbolic evolution functions, ie. when initial conditions are
2261 parameters, because it uses lambda matrices of integers. */
2264 analyze_subscript_affine_affine (tree chrec_a,
2266 conflict_function **overlaps_a,
2267 conflict_function **overlaps_b,
2268 tree *last_conflicts)
2270 unsigned nb_vars_a, nb_vars_b, dim;
2271 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2272 lambda_matrix A, U, S;
2273 struct obstack scratch_obstack;
2275 if (eq_evolutions_p (chrec_a, chrec_b))
2277 /* The accessed index overlaps for each iteration in the
2279 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2280 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2281 *last_conflicts = chrec_dont_know;
2284 if (dump_file && (dump_flags & TDF_DETAILS))
2285 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2287 /* For determining the initial intersection, we have to solve a
2288 Diophantine equation. This is the most time consuming part.
2290 For answering to the question: "Is there a dependence?" we have
2291 to prove that there exists a solution to the Diophantine
2292 equation, and that the solution is in the iteration domain,
2293 i.e. the solution is positive or zero, and that the solution
2294 happens before the upper bound loop.nb_iterations. Otherwise
2295 there is no dependence. This function outputs a description of
2296 the iterations that hold the intersections. */
2298 nb_vars_a = nb_vars_in_chrec (chrec_a);
2299 nb_vars_b = nb_vars_in_chrec (chrec_b);
2301 gcc_obstack_init (&scratch_obstack);
2303 dim = nb_vars_a + nb_vars_b;
2304 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2305 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2306 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2308 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2309 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2310 gamma = init_b - init_a;
2312 /* Don't do all the hard work of solving the Diophantine equation
2313 when we already know the solution: for example,
2316 | gamma = 3 - 3 = 0.
2317 Then the first overlap occurs during the first iterations:
2318 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2322 if (nb_vars_a == 1 && nb_vars_b == 1)
2324 HOST_WIDE_INT step_a, step_b;
2325 HOST_WIDE_INT niter, niter_a, niter_b;
2328 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2329 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2330 niter = MIN (niter_a, niter_b);
2331 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2332 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2334 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2337 *overlaps_a = conflict_fn (1, ova);
2338 *overlaps_b = conflict_fn (1, ovb);
2341 else if (nb_vars_a == 2 && nb_vars_b == 1)
2342 compute_overlap_steps_for_affine_1_2
2343 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2345 else if (nb_vars_a == 1 && nb_vars_b == 2)
2346 compute_overlap_steps_for_affine_1_2
2347 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2351 if (dump_file && (dump_flags & TDF_DETAILS))
2352 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2353 *overlaps_a = conflict_fn_not_known ();
2354 *overlaps_b = conflict_fn_not_known ();
2355 *last_conflicts = chrec_dont_know;
2357 goto end_analyze_subs_aa;
2361 lambda_matrix_right_hermite (A, dim, 1, S, U);
2366 lambda_matrix_row_negate (U, dim, 0);
2368 gcd_alpha_beta = S[0][0];
2370 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2371 but that is a quite strange case. Instead of ICEing, answer
2373 if (gcd_alpha_beta == 0)
2375 *overlaps_a = conflict_fn_not_known ();
2376 *overlaps_b = conflict_fn_not_known ();
2377 *last_conflicts = chrec_dont_know;
2378 goto end_analyze_subs_aa;
2381 /* The classic "gcd-test". */
2382 if (!int_divides_p (gcd_alpha_beta, gamma))
2384 /* The "gcd-test" has determined that there is no integer
2385 solution, i.e. there is no dependence. */
2386 *overlaps_a = conflict_fn_no_dependence ();
2387 *overlaps_b = conflict_fn_no_dependence ();
2388 *last_conflicts = integer_zero_node;
2391 /* Both access functions are univariate. This includes SIV and MIV cases. */
2392 else if (nb_vars_a == 1 && nb_vars_b == 1)
2394 /* Both functions should have the same evolution sign. */
2395 if (((A[0][0] > 0 && -A[1][0] > 0)
2396 || (A[0][0] < 0 && -A[1][0] < 0)))
2398 /* The solutions are given by:
2400 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2403 For a given integer t. Using the following variables,
2405 | i0 = u11 * gamma / gcd_alpha_beta
2406 | j0 = u12 * gamma / gcd_alpha_beta
2413 | y0 = j0 + j1 * t. */
2414 HOST_WIDE_INT i0, j0, i1, j1;
2416 i0 = U[0][0] * gamma / gcd_alpha_beta;
2417 j0 = U[0][1] * gamma / gcd_alpha_beta;
2421 if ((i1 == 0 && i0 < 0)
2422 || (j1 == 0 && j0 < 0))
2424 /* There is no solution.
2425 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2426 falls in here, but for the moment we don't look at the
2427 upper bound of the iteration domain. */
2428 *overlaps_a = conflict_fn_no_dependence ();
2429 *overlaps_b = conflict_fn_no_dependence ();
2430 *last_conflicts = integer_zero_node;
2431 goto end_analyze_subs_aa;
2434 if (i1 > 0 && j1 > 0)
2436 HOST_WIDE_INT niter_a = max_stmt_executions_int
2437 (get_chrec_loop (chrec_a), true);
2438 HOST_WIDE_INT niter_b = max_stmt_executions_int
2439 (get_chrec_loop (chrec_b), true);
2440 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2442 /* (X0, Y0) is a solution of the Diophantine equation:
2443 "chrec_a (X0) = chrec_b (Y0)". */
2444 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2446 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2447 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2449 /* (X1, Y1) is the smallest positive solution of the eq
2450 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2451 first conflict occurs. */
2452 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2453 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2454 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2458 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2459 FLOOR_DIV (niter - j0, j1));
2460 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2462 /* If the overlap occurs outside of the bounds of the
2463 loop, there is no dependence. */
2464 if (x1 >= niter || y1 >= niter)
2466 *overlaps_a = conflict_fn_no_dependence ();
2467 *overlaps_b = conflict_fn_no_dependence ();
2468 *last_conflicts = integer_zero_node;
2469 goto end_analyze_subs_aa;
2472 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2475 *last_conflicts = chrec_dont_know;
2479 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2481 build_int_cst (NULL_TREE, i1)));
2484 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2486 build_int_cst (NULL_TREE, j1)));
2490 /* FIXME: For the moment, the upper bound of the
2491 iteration domain for i and j is not checked. */
2492 if (dump_file && (dump_flags & TDF_DETAILS))
2493 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2494 *overlaps_a = conflict_fn_not_known ();
2495 *overlaps_b = conflict_fn_not_known ();
2496 *last_conflicts = chrec_dont_know;
2501 if (dump_file && (dump_flags & TDF_DETAILS))
2502 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2503 *overlaps_a = conflict_fn_not_known ();
2504 *overlaps_b = conflict_fn_not_known ();
2505 *last_conflicts = chrec_dont_know;
2510 if (dump_file && (dump_flags & TDF_DETAILS))
2511 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2512 *overlaps_a = conflict_fn_not_known ();
2513 *overlaps_b = conflict_fn_not_known ();
2514 *last_conflicts = chrec_dont_know;
2517 end_analyze_subs_aa:
2518 obstack_free (&scratch_obstack, NULL);
2519 if (dump_file && (dump_flags & TDF_DETAILS))
2521 fprintf (dump_file, " (overlaps_a = ");
2522 dump_conflict_function (dump_file, *overlaps_a);
2523 fprintf (dump_file, ")\n (overlaps_b = ");
2524 dump_conflict_function (dump_file, *overlaps_b);
2525 fprintf (dump_file, ")\n");
2526 fprintf (dump_file, ")\n");
2530 /* Returns true when analyze_subscript_affine_affine can be used for
2531 determining the dependence relation between chrec_a and chrec_b,
2532 that contain symbols. This function modifies chrec_a and chrec_b
2533 such that the analysis result is the same, and such that they don't
2534 contain symbols, and then can safely be passed to the analyzer.
2536 Example: The analysis of the following tuples of evolutions produce
2537 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2540 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2541 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2545 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2547 tree diff, type, left_a, left_b, right_b;
2549 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2550 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2551 /* FIXME: For the moment not handled. Might be refined later. */
2554 type = chrec_type (*chrec_a);
2555 left_a = CHREC_LEFT (*chrec_a);
2556 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2557 diff = chrec_fold_minus (type, left_a, left_b);
2559 if (!evolution_function_is_constant_p (diff))
2562 if (dump_file && (dump_flags & TDF_DETAILS))
2563 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2565 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2566 diff, CHREC_RIGHT (*chrec_a));
2567 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2568 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2569 build_int_cst (type, 0),
2574 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2575 *OVERLAPS_B are initialized to the functions that describe the
2576 relation between the elements accessed twice by CHREC_A and
2577 CHREC_B. For k >= 0, the following property is verified:
2579 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2582 analyze_siv_subscript (tree chrec_a,
2584 conflict_function **overlaps_a,
2585 conflict_function **overlaps_b,
2586 tree *last_conflicts,
2589 dependence_stats.num_siv++;
2591 if (dump_file && (dump_flags & TDF_DETAILS))
2592 fprintf (dump_file, "(analyze_siv_subscript \n");
2594 if (evolution_function_is_constant_p (chrec_a)
2595 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2596 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2597 overlaps_a, overlaps_b, last_conflicts);
2599 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2600 && evolution_function_is_constant_p (chrec_b))
2601 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2602 overlaps_b, overlaps_a, last_conflicts);
2604 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2605 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2607 if (!chrec_contains_symbols (chrec_a)
2608 && !chrec_contains_symbols (chrec_b))
2610 analyze_subscript_affine_affine (chrec_a, chrec_b,
2611 overlaps_a, overlaps_b,
2614 if (CF_NOT_KNOWN_P (*overlaps_a)
2615 || CF_NOT_KNOWN_P (*overlaps_b))
2616 dependence_stats.num_siv_unimplemented++;
2617 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2618 || CF_NO_DEPENDENCE_P (*overlaps_b))
2619 dependence_stats.num_siv_independent++;
2621 dependence_stats.num_siv_dependent++;
2623 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2626 analyze_subscript_affine_affine (chrec_a, chrec_b,
2627 overlaps_a, overlaps_b,
2630 if (CF_NOT_KNOWN_P (*overlaps_a)
2631 || CF_NOT_KNOWN_P (*overlaps_b))
2632 dependence_stats.num_siv_unimplemented++;
2633 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2634 || CF_NO_DEPENDENCE_P (*overlaps_b))
2635 dependence_stats.num_siv_independent++;
2637 dependence_stats.num_siv_dependent++;
2640 goto siv_subscript_dontknow;
2645 siv_subscript_dontknow:;
2646 if (dump_file && (dump_flags & TDF_DETAILS))
2647 fprintf (dump_file, "siv test failed: unimplemented.\n");
2648 *overlaps_a = conflict_fn_not_known ();
2649 *overlaps_b = conflict_fn_not_known ();
2650 *last_conflicts = chrec_dont_know;
2651 dependence_stats.num_siv_unimplemented++;
2654 if (dump_file && (dump_flags & TDF_DETAILS))
2655 fprintf (dump_file, ")\n");
2658 /* Returns false if we can prove that the greatest common divisor of the steps
2659 of CHREC does not divide CST, false otherwise. */
2662 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2664 HOST_WIDE_INT cd = 0, val;
2667 if (!host_integerp (cst, 0))
2669 val = tree_low_cst (cst, 0);
2671 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2673 step = CHREC_RIGHT (chrec);
2674 if (!host_integerp (step, 0))
2676 cd = gcd (cd, tree_low_cst (step, 0));
2677 chrec = CHREC_LEFT (chrec);
2680 return val % cd == 0;
2683 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2684 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2685 functions that describe the relation between the elements accessed
2686 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2689 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2692 analyze_miv_subscript (tree chrec_a,
2694 conflict_function **overlaps_a,
2695 conflict_function **overlaps_b,
2696 tree *last_conflicts,
2697 struct loop *loop_nest)
2699 tree type, difference;
2701 dependence_stats.num_miv++;
2702 if (dump_file && (dump_flags & TDF_DETAILS))
2703 fprintf (dump_file, "(analyze_miv_subscript \n");
2705 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2706 chrec_a = chrec_convert (type, chrec_a, NULL);
2707 chrec_b = chrec_convert (type, chrec_b, NULL);
2708 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2710 if (eq_evolutions_p (chrec_a, chrec_b))
2712 /* Access functions are the same: all the elements are accessed
2713 in the same order. */
2714 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2715 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2716 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2717 dependence_stats.num_miv_dependent++;
2720 else if (evolution_function_is_constant_p (difference)
2721 /* For the moment, the following is verified:
2722 evolution_function_is_affine_multivariate_p (chrec_a,
2724 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2726 /* testsuite/.../ssa-chrec-33.c
2727 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2729 The difference is 1, and all the evolution steps are multiples
2730 of 2, consequently there are no overlapping elements. */
2731 *overlaps_a = conflict_fn_no_dependence ();
2732 *overlaps_b = conflict_fn_no_dependence ();
2733 *last_conflicts = integer_zero_node;
2734 dependence_stats.num_miv_independent++;
2737 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2738 && !chrec_contains_symbols (chrec_a)
2739 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2740 && !chrec_contains_symbols (chrec_b))
2742 /* testsuite/.../ssa-chrec-35.c
2743 {0, +, 1}_2 vs. {0, +, 1}_3
2744 the overlapping elements are respectively located at iterations:
2745 {0, +, 1}_x and {0, +, 1}_x,
2746 in other words, we have the equality:
2747 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2750 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2751 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2753 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2754 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2756 analyze_subscript_affine_affine (chrec_a, chrec_b,
2757 overlaps_a, overlaps_b, last_conflicts);
2759 if (CF_NOT_KNOWN_P (*overlaps_a)
2760 || CF_NOT_KNOWN_P (*overlaps_b))
2761 dependence_stats.num_miv_unimplemented++;
2762 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2763 || CF_NO_DEPENDENCE_P (*overlaps_b))
2764 dependence_stats.num_miv_independent++;
2766 dependence_stats.num_miv_dependent++;
2771 /* When the analysis is too difficult, answer "don't know". */
2772 if (dump_file && (dump_flags & TDF_DETAILS))
2773 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2775 *overlaps_a = conflict_fn_not_known ();
2776 *overlaps_b = conflict_fn_not_known ();
2777 *last_conflicts = chrec_dont_know;
2778 dependence_stats.num_miv_unimplemented++;
2781 if (dump_file && (dump_flags & TDF_DETAILS))
2782 fprintf (dump_file, ")\n");
2785 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2786 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2787 OVERLAP_ITERATIONS_B are initialized with two functions that
2788 describe the iterations that contain conflicting elements.
2790 Remark: For an integer k >= 0, the following equality is true:
2792 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2796 analyze_overlapping_iterations (tree chrec_a,
2798 conflict_function **overlap_iterations_a,
2799 conflict_function **overlap_iterations_b,
2800 tree *last_conflicts, struct loop *loop_nest)
2802 unsigned int lnn = loop_nest->num;
2804 dependence_stats.num_subscript_tests++;
2806 if (dump_file && (dump_flags & TDF_DETAILS))
2808 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2809 fprintf (dump_file, " (chrec_a = ");
2810 print_generic_expr (dump_file, chrec_a, 0);
2811 fprintf (dump_file, ")\n (chrec_b = ");
2812 print_generic_expr (dump_file, chrec_b, 0);
2813 fprintf (dump_file, ")\n");
2816 if (chrec_a == NULL_TREE
2817 || chrec_b == NULL_TREE
2818 || chrec_contains_undetermined (chrec_a)
2819 || chrec_contains_undetermined (chrec_b))
2821 dependence_stats.num_subscript_undetermined++;
2823 *overlap_iterations_a = conflict_fn_not_known ();
2824 *overlap_iterations_b = conflict_fn_not_known ();
2827 /* If they are the same chrec, and are affine, they overlap
2828 on every iteration. */
2829 else if (eq_evolutions_p (chrec_a, chrec_b)
2830 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2831 || operand_equal_p (chrec_a, chrec_b, 0)))
2833 dependence_stats.num_same_subscript_function++;
2834 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2835 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2836 *last_conflicts = chrec_dont_know;
2839 /* If they aren't the same, and aren't affine, we can't do anything
2841 else if ((chrec_contains_symbols (chrec_a)
2842 || chrec_contains_symbols (chrec_b))
2843 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2844 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2846 dependence_stats.num_subscript_undetermined++;
2847 *overlap_iterations_a = conflict_fn_not_known ();
2848 *overlap_iterations_b = conflict_fn_not_known ();
2851 else if (ziv_subscript_p (chrec_a, chrec_b))
2852 analyze_ziv_subscript (chrec_a, chrec_b,
2853 overlap_iterations_a, overlap_iterations_b,
2856 else if (siv_subscript_p (chrec_a, chrec_b))
2857 analyze_siv_subscript (chrec_a, chrec_b,
2858 overlap_iterations_a, overlap_iterations_b,
2859 last_conflicts, lnn);
2862 analyze_miv_subscript (chrec_a, chrec_b,
2863 overlap_iterations_a, overlap_iterations_b,
2864 last_conflicts, loop_nest);
2866 if (dump_file && (dump_flags & TDF_DETAILS))
2868 fprintf (dump_file, " (overlap_iterations_a = ");
2869 dump_conflict_function (dump_file, *overlap_iterations_a);
2870 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2871 dump_conflict_function (dump_file, *overlap_iterations_b);
2872 fprintf (dump_file, ")\n");
2873 fprintf (dump_file, ")\n");
2877 /* Helper function for uniquely inserting distance vectors. */
2880 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2885 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2886 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2889 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2892 /* Helper function for uniquely inserting direction vectors. */
2895 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2900 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2901 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2904 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2907 /* Add a distance of 1 on all the loops outer than INDEX. If we
2908 haven't yet determined a distance for this outer loop, push a new
2909 distance vector composed of the previous distance, and a distance
2910 of 1 for this outer loop. Example:
2918 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2919 save (0, 1), then we have to save (1, 0). */
2922 add_outer_distances (struct data_dependence_relation *ddr,
2923 lambda_vector dist_v, int index)
2925 /* For each outer loop where init_v is not set, the accesses are
2926 in dependence of distance 1 in the loop. */
2927 while (--index >= 0)
2929 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2930 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2932 save_dist_v (ddr, save_v);
2936 /* Return false when fail to represent the data dependence as a
2937 distance vector. INIT_B is set to true when a component has been
2938 added to the distance vector DIST_V. INDEX_CARRY is then set to
2939 the index in DIST_V that carries the dependence. */
2942 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2943 struct data_reference *ddr_a,
2944 struct data_reference *ddr_b,
2945 lambda_vector dist_v, bool *init_b,
2949 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2951 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2953 tree access_fn_a, access_fn_b;
2954 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2956 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2958 non_affine_dependence_relation (ddr);
2962 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2963 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2965 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2966 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2969 int var_a = CHREC_VARIABLE (access_fn_a);
2970 int var_b = CHREC_VARIABLE (access_fn_b);
2973 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2975 non_affine_dependence_relation (ddr);
2979 dist = int_cst_value (SUB_DISTANCE (subscript));
2980 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2981 *index_carry = MIN (index, *index_carry);
2983 /* This is the subscript coupling test. If we have already
2984 recorded a distance for this loop (a distance coming from
2985 another subscript), it should be the same. For example,
2986 in the following code, there is no dependence:
2993 if (init_v[index] != 0 && dist_v[index] != dist)
2995 finalize_ddr_dependent (ddr, chrec_known);
2999 dist_v[index] = dist;
3003 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3005 /* This can be for example an affine vs. constant dependence
3006 (T[i] vs. T[3]) that is not an affine dependence and is
3007 not representable as a distance vector. */
3008 non_affine_dependence_relation (ddr);
3016 /* Return true when the DDR contains only constant access functions. */
3019 constant_access_functions (const struct data_dependence_relation *ddr)
3023 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3024 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3025 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3031 /* Helper function for the case where DDR_A and DDR_B are the same
3032 multivariate access function with a constant step. For an example
3036 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3039 tree c_1 = CHREC_LEFT (c_2);
3040 tree c_0 = CHREC_LEFT (c_1);
3041 lambda_vector dist_v;
3044 /* Polynomials with more than 2 variables are not handled yet. When
3045 the evolution steps are parameters, it is not possible to
3046 represent the dependence using classical distance vectors. */
3047 if (TREE_CODE (c_0) != INTEGER_CST
3048 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3049 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3051 DDR_AFFINE_P (ddr) = false;
3055 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3056 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3058 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3059 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3060 v1 = int_cst_value (CHREC_RIGHT (c_1));
3061 v2 = int_cst_value (CHREC_RIGHT (c_2));
3074 save_dist_v (ddr, dist_v);
3076 add_outer_distances (ddr, dist_v, x_1);
3079 /* Helper function for the case where DDR_A and DDR_B are the same
3080 access functions. */
3083 add_other_self_distances (struct data_dependence_relation *ddr)
3085 lambda_vector dist_v;
3087 int index_carry = DDR_NB_LOOPS (ddr);
3089 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3091 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3093 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3095 if (!evolution_function_is_univariate_p (access_fun))
3097 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3099 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3103 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3105 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3106 add_multivariate_self_dist (ddr, access_fun);
3108 /* The evolution step is not constant: it varies in
3109 the outer loop, so this cannot be represented by a
3110 distance vector. For example in pr34635.c the
3111 evolution is {0, +, {0, +, 4}_1}_2. */
3112 DDR_AFFINE_P (ddr) = false;
3117 index_carry = MIN (index_carry,
3118 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3119 DDR_LOOP_NEST (ddr)));
3123 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3124 add_outer_distances (ddr, dist_v, index_carry);
3128 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3130 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3132 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3133 save_dist_v (ddr, dist_v);
3136 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3137 is the case for example when access functions are the same and
3138 equal to a constant, as in:
3145 in which case the distance vectors are (0) and (1). */
3148 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3152 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3154 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3155 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3156 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3158 for (j = 0; j < ca->n; j++)
3159 if (affine_function_zero_p (ca->fns[j]))
3161 insert_innermost_unit_dist_vector (ddr);
3165 for (j = 0; j < cb->n; j++)
3166 if (affine_function_zero_p (cb->fns[j]))
3168 insert_innermost_unit_dist_vector (ddr);
3174 /* Compute the classic per loop distance vector. DDR is the data
3175 dependence relation to build a vector from. Return false when fail
3176 to represent the data dependence as a distance vector. */
3179 build_classic_dist_vector (struct data_dependence_relation *ddr,
3180 struct loop *loop_nest)
3182 bool init_b = false;
3183 int index_carry = DDR_NB_LOOPS (ddr);
3184 lambda_vector dist_v;
3186 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3189 if (same_access_functions (ddr))
3191 /* Save the 0 vector. */
3192 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3193 save_dist_v (ddr, dist_v);
3195 if (constant_access_functions (ddr))
3196 add_distance_for_zero_overlaps (ddr);
3198 if (DDR_NB_LOOPS (ddr) > 1)
3199 add_other_self_distances (ddr);
3204 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3205 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3206 dist_v, &init_b, &index_carry))
3209 /* Save the distance vector if we initialized one. */
3212 /* Verify a basic constraint: classic distance vectors should
3213 always be lexicographically positive.
3215 Data references are collected in the order of execution of
3216 the program, thus for the following loop
3218 | for (i = 1; i < 100; i++)
3219 | for (j = 1; j < 100; j++)
3221 | t = T[j+1][i-1]; // A
3222 | T[j][i] = t + 2; // B
3225 references are collected following the direction of the wind:
3226 A then B. The data dependence tests are performed also
3227 following this order, such that we're looking at the distance
3228 separating the elements accessed by A from the elements later
3229 accessed by B. But in this example, the distance returned by
3230 test_dep (A, B) is lexicographically negative (-1, 1), that
3231 means that the access A occurs later than B with respect to
3232 the outer loop, ie. we're actually looking upwind. In this
3233 case we solve test_dep (B, A) looking downwind to the
3234 lexicographically positive solution, that returns the
3235 distance vector (1, -1). */
3236 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3238 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3239 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3242 compute_subscript_distance (ddr);
3243 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3244 save_v, &init_b, &index_carry))
3246 save_dist_v (ddr, save_v);
3247 DDR_REVERSED_P (ddr) = true;
3249 /* In this case there is a dependence forward for all the
3252 | for (k = 1; k < 100; k++)
3253 | for (i = 1; i < 100; i++)
3254 | for (j = 1; j < 100; j++)
3256 | t = T[j+1][i-1]; // A
3257 | T[j][i] = t + 2; // B
3265 if (DDR_NB_LOOPS (ddr) > 1)
3267 add_outer_distances (ddr, save_v, index_carry);
3268 add_outer_distances (ddr, dist_v, index_carry);
3273 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3274 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3276 if (DDR_NB_LOOPS (ddr) > 1)
3278 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3280 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3281 DDR_A (ddr), loop_nest))
3283 compute_subscript_distance (ddr);
3284 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3285 opposite_v, &init_b,
3289 save_dist_v (ddr, save_v);
3290 add_outer_distances (ddr, dist_v, index_carry);
3291 add_outer_distances (ddr, opposite_v, index_carry);
3294 save_dist_v (ddr, save_v);
3299 /* There is a distance of 1 on all the outer loops: Example:
3300 there is a dependence of distance 1 on loop_1 for the array A.
3306 add_outer_distances (ddr, dist_v,
3307 lambda_vector_first_nz (dist_v,
3308 DDR_NB_LOOPS (ddr), 0));
3311 if (dump_file && (dump_flags & TDF_DETAILS))
3315 fprintf (dump_file, "(build_classic_dist_vector\n");
3316 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3318 fprintf (dump_file, " dist_vector = (");
3319 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3320 DDR_NB_LOOPS (ddr));
3321 fprintf (dump_file, " )\n");
3323 fprintf (dump_file, ")\n");
3329 /* Return the direction for a given distance.
3330 FIXME: Computing dir this way is suboptimal, since dir can catch
3331 cases that dist is unable to represent. */
3333 static inline enum data_dependence_direction
3334 dir_from_dist (int dist)
3337 return dir_positive;
3339 return dir_negative;
3344 /* Compute the classic per loop direction vector. DDR is the data
3345 dependence relation to build a vector from. */
3348 build_classic_dir_vector (struct data_dependence_relation *ddr)
3351 lambda_vector dist_v;
3353 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3355 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3357 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3358 dir_v[j] = dir_from_dist (dist_v[j]);
3360 save_dir_v (ddr, dir_v);
3364 /* Helper function. Returns true when there is a dependence between
3365 data references DRA and DRB. */
3368 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3369 struct data_reference *dra,
3370 struct data_reference *drb,
3371 struct loop *loop_nest)
3374 tree last_conflicts;
3375 struct subscript *subscript;
3377 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3380 conflict_function *overlaps_a, *overlaps_b;
3382 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3383 DR_ACCESS_FN (drb, i),
3384 &overlaps_a, &overlaps_b,
3385 &last_conflicts, loop_nest);
3387 if (CF_NOT_KNOWN_P (overlaps_a)
3388 || CF_NOT_KNOWN_P (overlaps_b))
3390 finalize_ddr_dependent (ddr, chrec_dont_know);
3391 dependence_stats.num_dependence_undetermined++;
3392 free_conflict_function (overlaps_a);
3393 free_conflict_function (overlaps_b);
3397 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3398 || CF_NO_DEPENDENCE_P (overlaps_b))
3400 finalize_ddr_dependent (ddr, chrec_known);
3401 dependence_stats.num_dependence_independent++;
3402 free_conflict_function (overlaps_a);
3403 free_conflict_function (overlaps_b);
3409 if (SUB_CONFLICTS_IN_A (subscript))
3410 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3411 if (SUB_CONFLICTS_IN_B (subscript))
3412 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3414 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3415 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3416 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3423 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3426 subscript_dependence_tester (struct data_dependence_relation *ddr,
3427 struct loop *loop_nest)
3430 if (dump_file && (dump_flags & TDF_DETAILS))
3431 fprintf (dump_file, "(subscript_dependence_tester \n");
3433 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3434 dependence_stats.num_dependence_dependent++;
3436 compute_subscript_distance (ddr);
3437 if (build_classic_dist_vector (ddr, loop_nest))
3438 build_classic_dir_vector (ddr);
3440 if (dump_file && (dump_flags & TDF_DETAILS))
3441 fprintf (dump_file, ")\n");
3444 /* Returns true when all the access functions of A are affine or
3445 constant with respect to LOOP_NEST. */
3448 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3449 const struct loop *loop_nest)
3452 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3455 FOR_EACH_VEC_ELT (tree, fns, i, t)
3456 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3457 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3463 /* Initializes an equation for an OMEGA problem using the information
3464 contained in the ACCESS_FUN. Returns true when the operation
3467 PB is the omega constraint system.
3468 EQ is the number of the equation to be initialized.
3469 OFFSET is used for shifting the variables names in the constraints:
3470 a constrain is composed of 2 * the number of variables surrounding
3471 dependence accesses. OFFSET is set either to 0 for the first n variables,
3472 then it is set to n.
3473 ACCESS_FUN is expected to be an affine chrec. */
3476 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3477 unsigned int offset, tree access_fun,
3478 struct data_dependence_relation *ddr)
3480 switch (TREE_CODE (access_fun))
3482 case POLYNOMIAL_CHREC:
3484 tree left = CHREC_LEFT (access_fun);
3485 tree right = CHREC_RIGHT (access_fun);
3486 int var = CHREC_VARIABLE (access_fun);
3489 if (TREE_CODE (right) != INTEGER_CST)
3492 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3493 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3495 /* Compute the innermost loop index. */
3496 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3499 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3500 += int_cst_value (right);
3502 switch (TREE_CODE (left))
3504 case POLYNOMIAL_CHREC:
3505 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3508 pb->eqs[eq].coef[0] += int_cst_value (left);
3517 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3525 /* As explained in the comments preceding init_omega_for_ddr, we have
3526 to set up a system for each loop level, setting outer loops
3527 variation to zero, and current loop variation to positive or zero.
3528 Save each lexico positive distance vector. */
3531 omega_extract_distance_vectors (omega_pb pb,
3532 struct data_dependence_relation *ddr)
3536 struct loop *loopi, *loopj;
3537 enum omega_result res;
3539 /* Set a new problem for each loop in the nest. The basis is the
3540 problem that we have initialized until now. On top of this we
3541 add new constraints. */
3542 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3543 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3546 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3547 DDR_NB_LOOPS (ddr));
3549 omega_copy_problem (copy, pb);
3551 /* For all the outer loops "loop_j", add "dj = 0". */
3553 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3555 eq = omega_add_zero_eq (copy, omega_black);
3556 copy->eqs[eq].coef[j + 1] = 1;
3559 /* For "loop_i", add "0 <= di". */
3560 geq = omega_add_zero_geq (copy, omega_black);
3561 copy->geqs[geq].coef[i + 1] = 1;
3563 /* Reduce the constraint system, and test that the current
3564 problem is feasible. */
3565 res = omega_simplify_problem (copy);
3566 if (res == omega_false
3567 || res == omega_unknown
3568 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3571 for (eq = 0; eq < copy->num_subs; eq++)
3572 if (copy->subs[eq].key == (int) i + 1)
3574 dist = copy->subs[eq].coef[0];
3580 /* Reinitialize problem... */
3581 omega_copy_problem (copy, pb);
3583 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3585 eq = omega_add_zero_eq (copy, omega_black);
3586 copy->eqs[eq].coef[j + 1] = 1;
3589 /* ..., but this time "di = 1". */
3590 eq = omega_add_zero_eq (copy, omega_black);
3591 copy->eqs[eq].coef[i + 1] = 1;
3592 copy->eqs[eq].coef[0] = -1;
3594 res = omega_simplify_problem (copy);
3595 if (res == omega_false
3596 || res == omega_unknown
3597 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3600 for (eq = 0; eq < copy->num_subs; eq++)
3601 if (copy->subs[eq].key == (int) i + 1)
3603 dist = copy->subs[eq].coef[0];
3609 /* Save the lexicographically positive distance vector. */
3612 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3613 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3617 for (eq = 0; eq < copy->num_subs; eq++)
3618 if (copy->subs[eq].key > 0)
3620 dist = copy->subs[eq].coef[0];
3621 dist_v[copy->subs[eq].key - 1] = dist;
3624 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3625 dir_v[j] = dir_from_dist (dist_v[j]);
3627 save_dist_v (ddr, dist_v);
3628 save_dir_v (ddr, dir_v);
3632 omega_free_problem (copy);
3636 /* This is called for each subscript of a tuple of data references:
3637 insert an equality for representing the conflicts. */
3640 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3641 struct data_dependence_relation *ddr,
3642 omega_pb pb, bool *maybe_dependent)
3645 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3646 TREE_TYPE (access_fun_b));
3647 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3648 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3649 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3652 /* When the fun_a - fun_b is not constant, the dependence is not
3653 captured by the classic distance vector representation. */
3654 if (TREE_CODE (difference) != INTEGER_CST)
3658 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3660 /* There is no dependence. */
3661 *maybe_dependent = false;
3665 minus_one = build_int_cst (type, -1);
3666 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3668 eq = omega_add_zero_eq (pb, omega_black);
3669 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3670 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3671 /* There is probably a dependence, but the system of
3672 constraints cannot be built: answer "don't know". */
3676 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3677 && !int_divides_p (lambda_vector_gcd
3678 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3679 2 * DDR_NB_LOOPS (ddr)),
3680 pb->eqs[eq].coef[0]))
3682 /* There is no dependence. */
3683 *maybe_dependent = false;
3690 /* Helper function, same as init_omega_for_ddr but specialized for
3691 data references A and B. */
3694 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3695 struct data_dependence_relation *ddr,
3696 omega_pb pb, bool *maybe_dependent)
3701 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3703 /* Insert an equality per subscript. */
3704 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3706 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3707 ddr, pb, maybe_dependent))
3709 else if (*maybe_dependent == false)
3711 /* There is no dependence. */
3712 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3717 /* Insert inequalities: constraints corresponding to the iteration
3718 domain, i.e. the loops surrounding the references "loop_x" and
3719 the distance variables "dx". The layout of the OMEGA
3720 representation is as follows:
3721 - coef[0] is the constant
3722 - coef[1..nb_loops] are the protected variables that will not be
3723 removed by the solver: the "dx"
3724 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3726 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3727 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3729 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3732 ineq = omega_add_zero_geq (pb, omega_black);
3733 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3735 /* 0 <= loop_x + dx */
3736 ineq = omega_add_zero_geq (pb, omega_black);
3737 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3738 pb->geqs[ineq].coef[i + 1] = 1;
3742 /* loop_x <= nb_iters */
3743 ineq = omega_add_zero_geq (pb, omega_black);
3744 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3745 pb->geqs[ineq].coef[0] = nbi;
3747 /* loop_x + dx <= nb_iters */
3748 ineq = omega_add_zero_geq (pb, omega_black);
3749 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3750 pb->geqs[ineq].coef[i + 1] = -1;
3751 pb->geqs[ineq].coef[0] = nbi;
3753 /* A step "dx" bigger than nb_iters is not feasible, so
3754 add "0 <= nb_iters + dx", */
3755 ineq = omega_add_zero_geq (pb, omega_black);
3756 pb->geqs[ineq].coef[i + 1] = 1;
3757 pb->geqs[ineq].coef[0] = nbi;
3758 /* and "dx <= nb_iters". */
3759 ineq = omega_add_zero_geq (pb, omega_black);
3760 pb->geqs[ineq].coef[i + 1] = -1;
3761 pb->geqs[ineq].coef[0] = nbi;
3765 omega_extract_distance_vectors (pb, ddr);
3770 /* Sets up the Omega dependence problem for the data dependence
3771 relation DDR. Returns false when the constraint system cannot be
3772 built, ie. when the test answers "don't know". Returns true
3773 otherwise, and when independence has been proved (using one of the
3774 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3775 set MAYBE_DEPENDENT to true.
3777 Example: for setting up the dependence system corresponding to the
3778 conflicting accesses
3783 | ... A[2*j, 2*(i + j)]
3787 the following constraints come from the iteration domain:
3794 where di, dj are the distance variables. The constraints
3795 representing the conflicting elements are:
3798 i + 1 = 2 * (i + di + j + dj)
3800 For asking that the resulting distance vector (di, dj) be
3801 lexicographically positive, we insert the constraint "di >= 0". If
3802 "di = 0" in the solution, we fix that component to zero, and we
3803 look at the inner loops: we set a new problem where all the outer
3804 loop distances are zero, and fix this inner component to be
3805 positive. When one of the components is positive, we save that
3806 distance, and set a new problem where the distance on this loop is
3807 zero, searching for other distances in the inner loops. Here is
3808 the classic example that illustrates that we have to set for each
3809 inner loop a new problem:
3817 we have to save two distances (1, 0) and (0, 1).
3819 Given two array references, refA and refB, we have to set the
3820 dependence problem twice, refA vs. refB and refB vs. refA, and we
3821 cannot do a single test, as refB might occur before refA in the
3822 inner loops, and the contrary when considering outer loops: ex.
3827 | T[{1,+,1}_2][{1,+,1}_1] // refA
3828 | T[{2,+,1}_2][{0,+,1}_1] // refB
3833 refB touches the elements in T before refA, and thus for the same
3834 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3835 but for successive loop_0 iterations, we have (1, -1, 1)
3837 The Omega solver expects the distance variables ("di" in the
3838 previous example) to come first in the constraint system (as
3839 variables to be protected, or "safe" variables), the constraint
3840 system is built using the following layout:
3842 "cst | distance vars | index vars".
3846 init_omega_for_ddr (struct data_dependence_relation *ddr,
3847 bool *maybe_dependent)
3852 *maybe_dependent = true;
3854 if (same_access_functions (ddr))
3857 lambda_vector dir_v;
3859 /* Save the 0 vector. */
3860 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3861 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3862 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3863 dir_v[j] = dir_equal;
3864 save_dir_v (ddr, dir_v);
3866 /* Save the dependences carried by outer loops. */
3867 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3868 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3870 omega_free_problem (pb);
3874 /* Omega expects the protected variables (those that have to be kept
3875 after elimination) to appear first in the constraint system.
3876 These variables are the distance variables. In the following
3877 initialization we declare NB_LOOPS safe variables, and the total
3878 number of variables for the constraint system is 2*NB_LOOPS. */
3879 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3880 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3882 omega_free_problem (pb);
3884 /* Stop computation if not decidable, or no dependence. */
3885 if (res == false || *maybe_dependent == false)
3888 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3889 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3891 omega_free_problem (pb);
3896 /* Return true when DDR contains the same information as that stored
3897 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3900 ddr_consistent_p (FILE *file,
3901 struct data_dependence_relation *ddr,
3902 VEC (lambda_vector, heap) *dist_vects,
3903 VEC (lambda_vector, heap) *dir_vects)
3907 /* If dump_file is set, output there. */
3908 if (dump_file && (dump_flags & TDF_DETAILS))
3911 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3913 lambda_vector b_dist_v;
3914 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3915 VEC_length (lambda_vector, dist_vects),
3916 DDR_NUM_DIST_VECTS (ddr));
3918 fprintf (file, "Banerjee dist vectors:\n");
3919 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3920 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3922 fprintf (file, "Omega dist vectors:\n");
3923 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3924 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3926 fprintf (file, "data dependence relation:\n");
3927 dump_data_dependence_relation (file, ddr);
3929 fprintf (file, ")\n");
3933 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3935 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3936 VEC_length (lambda_vector, dir_vects),
3937 DDR_NUM_DIR_VECTS (ddr));
3941 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3943 lambda_vector a_dist_v;
3944 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3946 /* Distance vectors are not ordered in the same way in the DDR
3947 and in the DIST_VECTS: search for a matching vector. */
3948 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3949 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3952 if (j == VEC_length (lambda_vector, dist_vects))
3954 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3955 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3956 fprintf (file, "not found in Omega dist vectors:\n");
3957 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3958 fprintf (file, "data dependence relation:\n");
3959 dump_data_dependence_relation (file, ddr);
3960 fprintf (file, ")\n");
3964 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3966 lambda_vector a_dir_v;
3967 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3969 /* Direction vectors are not ordered in the same way in the DDR
3970 and in the DIR_VECTS: search for a matching vector. */
3971 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3972 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3975 if (j == VEC_length (lambda_vector, dist_vects))
3977 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3978 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3979 fprintf (file, "not found in Omega dir vectors:\n");
3980 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3981 fprintf (file, "data dependence relation:\n");
3982 dump_data_dependence_relation (file, ddr);
3983 fprintf (file, ")\n");
3990 /* This computes the affine dependence relation between A and B with
3991 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3992 independence between two accesses, while CHREC_DONT_KNOW is used
3993 for representing the unknown relation.
3995 Note that it is possible to stop the computation of the dependence
3996 relation the first time we detect a CHREC_KNOWN element for a given
4000 compute_affine_dependence (struct data_dependence_relation *ddr,
4001 struct loop *loop_nest)
4003 struct data_reference *dra = DDR_A (ddr);
4004 struct data_reference *drb = DDR_B (ddr);
4006 if (dump_file && (dump_flags & TDF_DETAILS))
4008 fprintf (dump_file, "(compute_affine_dependence\n");
4009 fprintf (dump_file, " (stmt_a = \n");
4010 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4011 fprintf (dump_file, ")\n (stmt_b = \n");
4012 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4013 fprintf (dump_file, ")\n");
4016 /* Analyze only when the dependence relation is not yet known. */
4017 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4018 && !DDR_SELF_REFERENCE (ddr))
4020 dependence_stats.num_dependence_tests++;
4022 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4023 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4025 if (flag_check_data_deps)
4027 /* Compute the dependences using the first algorithm. */
4028 subscript_dependence_tester (ddr, loop_nest);
4030 if (dump_file && (dump_flags & TDF_DETAILS))
4032 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4033 dump_data_dependence_relation (dump_file, ddr);
4036 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4038 bool maybe_dependent;
4039 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4041 /* Save the result of the first DD analyzer. */
4042 dist_vects = DDR_DIST_VECTS (ddr);
4043 dir_vects = DDR_DIR_VECTS (ddr);
4045 /* Reset the information. */
4046 DDR_DIST_VECTS (ddr) = NULL;
4047 DDR_DIR_VECTS (ddr) = NULL;
4049 /* Compute the same information using Omega. */
4050 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4051 goto csys_dont_know;
4053 if (dump_file && (dump_flags & TDF_DETAILS))
4055 fprintf (dump_file, "Omega Analyzer\n");
4056 dump_data_dependence_relation (dump_file, ddr);
4059 /* Check that we get the same information. */
4060 if (maybe_dependent)
4061 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4066 subscript_dependence_tester (ddr, loop_nest);
4069 /* As a last case, if the dependence cannot be determined, or if
4070 the dependence is considered too difficult to determine, answer
4075 dependence_stats.num_dependence_undetermined++;
4077 if (dump_file && (dump_flags & TDF_DETAILS))
4079 fprintf (dump_file, "Data ref a:\n");
4080 dump_data_reference (dump_file, dra);
4081 fprintf (dump_file, "Data ref b:\n");
4082 dump_data_reference (dump_file, drb);
4083 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4085 finalize_ddr_dependent (ddr, chrec_dont_know);
4089 if (dump_file && (dump_flags & TDF_DETAILS))
4090 fprintf (dump_file, ")\n");
4093 /* This computes the dependence relation for the same data
4094 reference into DDR. */
4097 compute_self_dependence (struct data_dependence_relation *ddr)
4100 struct subscript *subscript;
4102 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4105 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4108 if (SUB_CONFLICTS_IN_A (subscript))
4109 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4110 if (SUB_CONFLICTS_IN_B (subscript))
4111 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4113 /* The accessed index overlaps for each iteration. */
4114 SUB_CONFLICTS_IN_A (subscript)
4115 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4116 SUB_CONFLICTS_IN_B (subscript)
4117 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4118 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4121 /* The distance vector is the zero vector. */
4122 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4123 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4126 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4127 the data references in DATAREFS, in the LOOP_NEST. When
4128 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4132 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4133 VEC (ddr_p, heap) **dependence_relations,
4134 VEC (loop_p, heap) *loop_nest,
4135 bool compute_self_and_rr)
4137 struct data_dependence_relation *ddr;
4138 struct data_reference *a, *b;
4141 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4142 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4143 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4145 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4146 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4148 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4151 if (compute_self_and_rr)
4152 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4154 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4155 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4156 compute_self_dependence (ddr);
4160 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4161 true if STMT clobbers memory, false otherwise. */
4164 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4166 bool clobbers_memory = false;
4169 enum gimple_code stmt_code = gimple_code (stmt);
4173 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4174 Calls have side-effects, except those to const or pure
4176 if ((stmt_code == GIMPLE_CALL
4177 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4178 || (stmt_code == GIMPLE_ASM
4179 && gimple_asm_volatile_p (stmt)))
4180 clobbers_memory = true;
4182 if (!gimple_vuse (stmt))
4183 return clobbers_memory;
4185 if (stmt_code == GIMPLE_ASSIGN)
4188 op0 = gimple_assign_lhs_ptr (stmt);
4189 op1 = gimple_assign_rhs1_ptr (stmt);
4192 || (REFERENCE_CLASS_P (*op1)
4193 && (base = get_base_address (*op1))
4194 && TREE_CODE (base) != SSA_NAME))
4196 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4198 ref->is_read = true;
4201 else if (stmt_code == GIMPLE_CALL)
4205 op0 = gimple_call_lhs_ptr (stmt);
4206 n = gimple_call_num_args (stmt);
4207 for (i = 0; i < n; i++)
4209 op1 = gimple_call_arg_ptr (stmt, i);
4212 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4214 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4216 ref->is_read = true;
4221 return clobbers_memory;
4225 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4227 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4229 ref->is_read = false;
4231 return clobbers_memory;
4234 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4235 reference, returns false, otherwise returns true. NEST is the outermost
4236 loop of the loop nest in which the references should be analyzed. */
4239 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4240 VEC (data_reference_p, heap) **datarefs)
4243 VEC (data_ref_loc, heap) *references;
4246 data_reference_p dr;
4248 if (get_references_in_stmt (stmt, &references))
4250 VEC_free (data_ref_loc, heap, references);
4254 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4256 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4257 *ref->pos, stmt, ref->is_read);
4258 gcc_assert (dr != NULL);
4259 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4261 VEC_free (data_ref_loc, heap, references);
4265 /* Stores the data references in STMT to DATAREFS. If there is an
4266 unanalyzable reference, returns false, otherwise returns true.
4267 NEST is the outermost loop of the loop nest in which the references
4268 should be instantiated, LOOP is the loop in which the references
4269 should be analyzed. */
4272 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4273 VEC (data_reference_p, heap) **datarefs)
4276 VEC (data_ref_loc, heap) *references;
4279 data_reference_p dr;
4281 if (get_references_in_stmt (stmt, &references))
4283 VEC_free (data_ref_loc, heap, references);
4287 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4289 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4290 gcc_assert (dr != NULL);
4291 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4294 VEC_free (data_ref_loc, heap, references);
4298 /* Search the data references in LOOP, and record the information into
4299 DATAREFS. Returns chrec_dont_know when failing to analyze a
4300 difficult case, returns NULL_TREE otherwise. */
4303 find_data_references_in_bb (struct loop *loop, basic_block bb,
4304 VEC (data_reference_p, heap) **datarefs)
4306 gimple_stmt_iterator bsi;
4308 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4310 gimple stmt = gsi_stmt (bsi);
4312 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4314 struct data_reference *res;
4315 res = XCNEW (struct data_reference);
4316 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4318 return chrec_dont_know;
4325 /* Search the data references in LOOP, and record the information into
4326 DATAREFS. Returns chrec_dont_know when failing to analyze a
4327 difficult case, returns NULL_TREE otherwise.
4329 TODO: This function should be made smarter so that it can handle address
4330 arithmetic as if they were array accesses, etc. */
4333 find_data_references_in_loop (struct loop *loop,
4334 VEC (data_reference_p, heap) **datarefs)
4336 basic_block bb, *bbs;
4339 bbs = get_loop_body_in_dom_order (loop);
4341 for (i = 0; i < loop->num_nodes; i++)
4345 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4348 return chrec_dont_know;
4356 /* Recursive helper function. */
4359 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4361 /* Inner loops of the nest should not contain siblings. Example:
4362 when there are two consecutive loops,
4373 the dependence relation cannot be captured by the distance
4378 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4380 return find_loop_nest_1 (loop->inner, loop_nest);
4384 /* Return false when the LOOP is not well nested. Otherwise return
4385 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4386 contain the loops from the outermost to the innermost, as they will
4387 appear in the classic distance vector. */
4390 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4392 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4394 return find_loop_nest_1 (loop->inner, loop_nest);
4398 /* Returns true when the data dependences have been computed, false otherwise.
4399 Given a loop nest LOOP, the following vectors are returned:
4400 DATAREFS is initialized to all the array elements contained in this loop,
4401 DEPENDENCE_RELATIONS contains the relations between the data references.
4402 Compute read-read and self relations if
4403 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4406 compute_data_dependences_for_loop (struct loop *loop,
4407 bool compute_self_and_read_read_dependences,
4408 VEC (loop_p, heap) **loop_nest,
4409 VEC (data_reference_p, heap) **datarefs,
4410 VEC (ddr_p, heap) **dependence_relations)
4414 memset (&dependence_stats, 0, sizeof (dependence_stats));
4416 /* If the loop nest is not well formed, or one of the data references
4417 is not computable, give up without spending time to compute other
4420 || !find_loop_nest (loop, loop_nest)
4421 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4423 struct data_dependence_relation *ddr;
4425 /* Insert a single relation into dependence_relations:
4427 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4428 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4432 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4433 compute_self_and_read_read_dependences);
4435 if (dump_file && (dump_flags & TDF_STATS))
4437 fprintf (dump_file, "Dependence tester statistics:\n");
4439 fprintf (dump_file, "Number of dependence tests: %d\n",
4440 dependence_stats.num_dependence_tests);
4441 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4442 dependence_stats.num_dependence_dependent);
4443 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4444 dependence_stats.num_dependence_independent);
4445 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4446 dependence_stats.num_dependence_undetermined);
4448 fprintf (dump_file, "Number of subscript tests: %d\n",
4449 dependence_stats.num_subscript_tests);
4450 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4451 dependence_stats.num_subscript_undetermined);
4452 fprintf (dump_file, "Number of same subscript function: %d\n",
4453 dependence_stats.num_same_subscript_function);
4455 fprintf (dump_file, "Number of ziv tests: %d\n",
4456 dependence_stats.num_ziv);
4457 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4458 dependence_stats.num_ziv_dependent);
4459 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4460 dependence_stats.num_ziv_independent);
4461 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4462 dependence_stats.num_ziv_unimplemented);
4464 fprintf (dump_file, "Number of siv tests: %d\n",
4465 dependence_stats.num_siv);
4466 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4467 dependence_stats.num_siv_dependent);
4468 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4469 dependence_stats.num_siv_independent);
4470 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4471 dependence_stats.num_siv_unimplemented);
4473 fprintf (dump_file, "Number of miv tests: %d\n",
4474 dependence_stats.num_miv);
4475 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4476 dependence_stats.num_miv_dependent);
4477 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4478 dependence_stats.num_miv_independent);
4479 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4480 dependence_stats.num_miv_unimplemented);
4486 /* Returns true when the data dependences for the basic block BB have been
4487 computed, false otherwise.
4488 DATAREFS is initialized to all the array elements contained in this basic
4489 block, DEPENDENCE_RELATIONS contains the relations between the data
4490 references. Compute read-read and self relations if
4491 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4493 compute_data_dependences_for_bb (basic_block bb,
4494 bool compute_self_and_read_read_dependences,
4495 VEC (data_reference_p, heap) **datarefs,
4496 VEC (ddr_p, heap) **dependence_relations)
4498 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4501 compute_all_dependences (*datarefs, dependence_relations, NULL,
4502 compute_self_and_read_read_dependences);
4506 /* Entry point (for testing only). Analyze all the data references
4507 and the dependence relations in LOOP.
4509 The data references are computed first.
4511 A relation on these nodes is represented by a complete graph. Some
4512 of the relations could be of no interest, thus the relations can be
4515 In the following function we compute all the relations. This is
4516 just a first implementation that is here for:
4517 - for showing how to ask for the dependence relations,
4518 - for the debugging the whole dependence graph,
4519 - for the dejagnu testcases and maintenance.
4521 It is possible to ask only for a part of the graph, avoiding to
4522 compute the whole dependence graph. The computed dependences are
4523 stored in a knowledge base (KB) such that later queries don't
4524 recompute the same information. The implementation of this KB is
4525 transparent to the optimizer, and thus the KB can be changed with a
4526 more efficient implementation, or the KB could be disabled. */
4528 analyze_all_data_dependences (struct loop *loop)
4531 int nb_data_refs = 10;
4532 VEC (data_reference_p, heap) *datarefs =
4533 VEC_alloc (data_reference_p, heap, nb_data_refs);
4534 VEC (ddr_p, heap) *dependence_relations =
4535 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4536 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4538 /* Compute DDs on the whole function. */
4539 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4540 &dependence_relations);
4544 dump_data_dependence_relations (dump_file, dependence_relations);
4545 fprintf (dump_file, "\n\n");
4547 if (dump_flags & TDF_DETAILS)
4548 dump_dist_dir_vectors (dump_file, dependence_relations);
4550 if (dump_flags & TDF_STATS)
4552 unsigned nb_top_relations = 0;
4553 unsigned nb_bot_relations = 0;
4554 unsigned nb_chrec_relations = 0;
4555 struct data_dependence_relation *ddr;
4557 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4559 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4562 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4566 nb_chrec_relations++;
4569 gather_stats_on_scev_database ();
4573 VEC_free (loop_p, heap, loop_nest);
4574 free_dependence_relations (dependence_relations);
4575 free_data_refs (datarefs);
4578 /* Computes all the data dependences and check that the results of
4579 several analyzers are the same. */
4582 tree_check_data_deps (void)
4585 struct loop *loop_nest;
4587 FOR_EACH_LOOP (li, loop_nest, 0)
4588 analyze_all_data_dependences (loop_nest);
4591 /* Free the memory used by a data dependence relation DDR. */
4594 free_dependence_relation (struct data_dependence_relation *ddr)
4599 if (DDR_SUBSCRIPTS (ddr))
4600 free_subscripts (DDR_SUBSCRIPTS (ddr));
4601 if (DDR_DIST_VECTS (ddr))
4602 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4603 if (DDR_DIR_VECTS (ddr))
4604 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4609 /* Free the memory used by the data dependence relations from
4610 DEPENDENCE_RELATIONS. */
4613 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4616 struct data_dependence_relation *ddr;
4618 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4620 free_dependence_relation (ddr);
4622 VEC_free (ddr_p, heap, dependence_relations);
4625 /* Free the memory used by the data references from DATAREFS. */
4628 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4631 struct data_reference *dr;
4633 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4635 VEC_free (data_reference_p, heap, datarefs);
4640 /* Dump vertex I in RDG to FILE. */
4643 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4645 struct vertex *v = &(rdg->vertices[i]);
4646 struct graph_edge *e;
4648 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4649 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4650 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4653 for (e = v->pred; e; e = e->pred_next)
4654 fprintf (file, " %d", e->src);
4656 fprintf (file, ") (out:");
4659 for (e = v->succ; e; e = e->succ_next)
4660 fprintf (file, " %d", e->dest);
4662 fprintf (file, ")\n");
4663 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4664 fprintf (file, ")\n");
4667 /* Call dump_rdg_vertex on stderr. */
4670 debug_rdg_vertex (struct graph *rdg, int i)
4672 dump_rdg_vertex (stderr, rdg, i);
4675 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4676 dumped vertices to that bitmap. */
4678 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4682 fprintf (file, "(%d\n", c);
4684 for (i = 0; i < rdg->n_vertices; i++)
4685 if (rdg->vertices[i].component == c)
4688 bitmap_set_bit (dumped, i);
4690 dump_rdg_vertex (file, rdg, i);
4693 fprintf (file, ")\n");
4696 /* Call dump_rdg_vertex on stderr. */
4699 debug_rdg_component (struct graph *rdg, int c)
4701 dump_rdg_component (stderr, rdg, c, NULL);
4704 /* Dump the reduced dependence graph RDG to FILE. */
4707 dump_rdg (FILE *file, struct graph *rdg)
4710 bitmap dumped = BITMAP_ALLOC (NULL);
4712 fprintf (file, "(rdg\n");
4714 for (i = 0; i < rdg->n_vertices; i++)
4715 if (!bitmap_bit_p (dumped, i))
4716 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4718 fprintf (file, ")\n");
4719 BITMAP_FREE (dumped);
4722 /* Call dump_rdg on stderr. */
4725 debug_rdg (struct graph *rdg)
4727 dump_rdg (stderr, rdg);
4731 dot_rdg_1 (FILE *file, struct graph *rdg)
4735 fprintf (file, "digraph RDG {\n");
4737 for (i = 0; i < rdg->n_vertices; i++)
4739 struct vertex *v = &(rdg->vertices[i]);
4740 struct graph_edge *e;
4742 /* Highlight reads from memory. */
4743 if (RDG_MEM_READS_STMT (rdg, i))
4744 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4746 /* Highlight stores to memory. */
4747 if (RDG_MEM_WRITE_STMT (rdg, i))
4748 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4751 for (e = v->succ; e; e = e->succ_next)
4752 switch (RDGE_TYPE (e))
4755 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4759 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4763 /* These are the most common dependences: don't print these. */
4764 fprintf (file, "%d -> %d \n", i, e->dest);
4768 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4776 fprintf (file, "}\n\n");
4779 /* Display the Reduced Dependence Graph using dotty. */
4780 extern void dot_rdg (struct graph *);
4783 dot_rdg (struct graph *rdg)
4785 /* When debugging, enable the following code. This cannot be used
4786 in production compilers because it calls "system". */
4788 FILE *file = fopen ("/tmp/rdg.dot", "w");
4789 gcc_assert (file != NULL);
4791 dot_rdg_1 (file, rdg);
4794 system ("dotty /tmp/rdg.dot &");
4796 dot_rdg_1 (stderr, rdg);
4800 /* This structure is used for recording the mapping statement index in
4803 struct GTY(()) rdg_vertex_info
4809 /* Returns the index of STMT in RDG. */
4812 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4814 struct rdg_vertex_info rvi, *slot;
4817 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4825 /* Creates an edge in RDG for each distance vector from DDR. The
4826 order that we keep track of in the RDG is the order in which
4827 statements have to be executed. */
4830 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4832 struct graph_edge *e;
4834 data_reference_p dra = DDR_A (ddr);
4835 data_reference_p drb = DDR_B (ddr);
4836 unsigned level = ddr_dependence_level (ddr);
4838 /* For non scalar dependences, when the dependence is REVERSED,
4839 statement B has to be executed before statement A. */
4841 && !DDR_REVERSED_P (ddr))
4843 data_reference_p tmp = dra;
4848 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4849 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4851 if (va < 0 || vb < 0)
4854 e = add_edge (rdg, va, vb);
4855 e->data = XNEW (struct rdg_edge);
4857 RDGE_LEVEL (e) = level;
4858 RDGE_RELATION (e) = ddr;
4860 /* Determines the type of the data dependence. */
4861 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4862 RDGE_TYPE (e) = input_dd;
4863 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4864 RDGE_TYPE (e) = output_dd;
4865 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4866 RDGE_TYPE (e) = flow_dd;
4867 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4868 RDGE_TYPE (e) = anti_dd;
4871 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4872 the index of DEF in RDG. */
4875 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4877 use_operand_p imm_use_p;
4878 imm_use_iterator iterator;
4880 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4882 struct graph_edge *e;
4883 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4888 e = add_edge (rdg, idef, use);
4889 e->data = XNEW (struct rdg_edge);
4890 RDGE_TYPE (e) = flow_dd;
4891 RDGE_RELATION (e) = NULL;
4895 /* Creates the edges of the reduced dependence graph RDG. */
4898 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4901 struct data_dependence_relation *ddr;
4902 def_operand_p def_p;
4905 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4906 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4907 create_rdg_edge_for_ddr (rdg, ddr);
4909 for (i = 0; i < rdg->n_vertices; i++)
4910 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4912 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4915 /* Build the vertices of the reduced dependence graph RDG. */
4918 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4923 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4925 VEC (data_ref_loc, heap) *references;
4927 struct vertex *v = &(rdg->vertices[i]);
4928 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4929 struct rdg_vertex_info **slot;
4933 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4940 v->data = XNEW (struct rdg_vertex);
4941 RDG_STMT (rdg, i) = stmt;
4943 RDG_MEM_WRITE_STMT (rdg, i) = false;
4944 RDG_MEM_READS_STMT (rdg, i) = false;
4945 if (gimple_code (stmt) == GIMPLE_PHI)
4948 get_references_in_stmt (stmt, &references);
4949 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4951 RDG_MEM_WRITE_STMT (rdg, i) = true;
4953 RDG_MEM_READS_STMT (rdg, i) = true;
4955 VEC_free (data_ref_loc, heap, references);
4959 /* Initialize STMTS with all the statements of LOOP. When
4960 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4961 which we discover statements is important as
4962 generate_loops_for_partition is using the same traversal for
4963 identifying statements. */
4966 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4969 basic_block *bbs = get_loop_body_in_dom_order (loop);
4971 for (i = 0; i < loop->num_nodes; i++)
4973 basic_block bb = bbs[i];
4974 gimple_stmt_iterator bsi;
4977 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4978 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4980 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4982 stmt = gsi_stmt (bsi);
4983 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4984 VEC_safe_push (gimple, heap, *stmts, stmt);
4991 /* Returns true when all the dependences are computable. */
4994 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4999 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5000 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5006 /* Computes a hash function for element ELT. */
5009 hash_stmt_vertex_info (const void *elt)
5011 const struct rdg_vertex_info *const rvi =
5012 (const struct rdg_vertex_info *) elt;
5013 gimple stmt = rvi->stmt;
5015 return htab_hash_pointer (stmt);
5018 /* Compares database elements E1 and E2. */
5021 eq_stmt_vertex_info (const void *e1, const void *e2)
5023 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5024 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5026 return elt1->stmt == elt2->stmt;
5029 /* Free the element E. */
5032 hash_stmt_vertex_del (void *e)
5037 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5038 statement of the loop nest, and one edge per data dependence or
5039 scalar dependence. */
5042 build_empty_rdg (int n_stmts)
5044 int nb_data_refs = 10;
5045 struct graph *rdg = new_graph (n_stmts);
5047 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5048 eq_stmt_vertex_info, hash_stmt_vertex_del);
5052 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5053 statement of the loop nest, and one edge per data dependence or
5054 scalar dependence. */
5057 build_rdg (struct loop *loop,
5058 VEC (loop_p, heap) **loop_nest,
5059 VEC (ddr_p, heap) **dependence_relations,
5060 VEC (data_reference_p, heap) **datarefs)
5062 struct graph *rdg = NULL;
5063 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5065 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5066 dependence_relations);
5068 if (known_dependences_p (*dependence_relations))
5070 stmts_from_loop (loop, &stmts);
5071 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5072 create_rdg_vertices (rdg, stmts);
5073 create_rdg_edges (rdg, *dependence_relations);
5076 VEC_free (gimple, heap, stmts);
5080 /* Free the reduced dependence graph RDG. */
5083 free_rdg (struct graph *rdg)
5087 for (i = 0; i < rdg->n_vertices; i++)
5089 struct vertex *v = &(rdg->vertices[i]);
5090 struct graph_edge *e;
5092 for (e = v->succ; e; e = e->succ_next)
5098 htab_delete (rdg->indices);
5102 /* Initialize STMTS with all the statements of LOOP that contain a
5106 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5109 basic_block *bbs = get_loop_body_in_dom_order (loop);
5111 for (i = 0; i < loop->num_nodes; i++)
5113 basic_block bb = bbs[i];
5114 gimple_stmt_iterator bsi;
5116 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5117 if (gimple_vdef (gsi_stmt (bsi)))
5118 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5124 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5125 that contains a data reference on its LHS with a stride of the same
5126 size as its unit type. */
5129 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5133 struct data_reference *dr;
5136 || !gimple_vdef (stmt)
5137 || !is_gimple_assign (stmt)
5138 || !gimple_assign_single_p (stmt)
5139 || !(op1 = gimple_assign_rhs1 (stmt))
5140 || !(integer_zerop (op1) || real_zerop (op1)))
5143 dr = XCNEW (struct data_reference);
5144 op0 = gimple_assign_lhs (stmt);
5146 DR_STMT (dr) = stmt;
5149 res = dr_analyze_innermost (dr)
5150 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5156 /* Initialize STMTS with all the statements of LOOP that contain a
5157 store to memory of the form "A[i] = 0". */
5160 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5164 gimple_stmt_iterator si;
5166 basic_block *bbs = get_loop_body_in_dom_order (loop);
5168 for (i = 0; i < loop->num_nodes; i++)
5169 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5170 if ((stmt = gsi_stmt (si))
5171 && stmt_with_adjacent_zero_store_dr_p (stmt))
5172 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5177 /* For a data reference REF, return the declaration of its base
5178 address or NULL_TREE if the base is not determined. */
5181 ref_base_address (gimple stmt, data_ref_loc *ref)
5183 tree base = NULL_TREE;
5185 struct data_reference *dr = XCNEW (struct data_reference);
5187 DR_STMT (dr) = stmt;
5188 DR_REF (dr) = *ref->pos;
5189 dr_analyze_innermost (dr);
5190 base_address = DR_BASE_ADDRESS (dr);
5195 switch (TREE_CODE (base_address))
5198 base = TREE_OPERAND (base_address, 0);
5202 base = base_address;
5211 /* Determines whether the statement from vertex V of the RDG has a
5212 definition used outside the loop that contains this statement. */
5215 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5217 gimple stmt = RDG_STMT (rdg, v);
5218 struct loop *loop = loop_containing_stmt (stmt);
5219 use_operand_p imm_use_p;
5220 imm_use_iterator iterator;
5222 def_operand_p def_p;
5227 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5229 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5231 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5239 /* Determines whether statements S1 and S2 access to similar memory
5240 locations. Two memory accesses are considered similar when they
5241 have the same base address declaration, i.e. when their
5242 ref_base_address is the same. */
5245 have_similar_memory_accesses (gimple s1, gimple s2)
5249 VEC (data_ref_loc, heap) *refs1, *refs2;
5250 data_ref_loc *ref1, *ref2;
5252 get_references_in_stmt (s1, &refs1);
5253 get_references_in_stmt (s2, &refs2);
5255 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5257 tree base1 = ref_base_address (s1, ref1);
5260 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5261 if (base1 == ref_base_address (s2, ref2))
5269 VEC_free (data_ref_loc, heap, refs1);
5270 VEC_free (data_ref_loc, heap, refs2);
5274 /* Helper function for the hashtab. */
5277 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5279 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5280 CONST_CAST_GIMPLE ((const_gimple) s2));
5283 /* Helper function for the hashtab. */
5286 ref_base_address_1 (const void *s)
5288 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5290 VEC (data_ref_loc, heap) *refs;
5294 get_references_in_stmt (stmt, &refs);
5296 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5299 res = htab_hash_pointer (ref_base_address (stmt, ref));
5303 VEC_free (data_ref_loc, heap, refs);
5307 /* Try to remove duplicated write data references from STMTS. */
5310 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5314 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5315 have_similar_memory_accesses_1, NULL);
5317 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5321 slot = htab_find_slot (seen, stmt, INSERT);
5324 VEC_ordered_remove (gimple, *stmts, i);
5327 *slot = (void *) stmt;
5335 /* Returns the index of PARAMETER in the parameters vector of the
5336 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5339 access_matrix_get_index_for_parameter (tree parameter,
5340 struct access_matrix *access_matrix)
5343 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5344 tree lambda_parameter;
5346 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5347 if (lambda_parameter == parameter)
5348 return i + AM_NB_INDUCTION_VARS (access_matrix);