1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
119 /* Returns true iff A divides B. */
122 tree_fold_divides_p (const_tree a, const_tree b)
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 /* Returns true iff A divides B. */
132 int_divides_p (int a, int b)
134 return ((b % a) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145 struct data_reference *dr;
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
151 /* Dump into STDERR all the data references from DATAREFS. */
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 dump_data_references (stderr, datarefs);
159 /* Dump to STDERR all the dependence relations from DDRS. */
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
174 struct data_dependence_relation *ddr;
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
180 /* Print to STDERR the data_reference DR. */
183 debug_data_reference (struct data_reference *dr)
185 dump_data_reference (stderr, dr);
188 /* Dump function for a DATA_REFERENCE structure. */
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 fprintf (outf, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
216 dump_affine_function (FILE *outf, affine_fn fn)
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
230 /* Dumps the conflict function CF to the file OUTF. */
233 dump_conflict_function (FILE *outf, conflict_function *cf)
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
243 for (i = 0; i < cf->n; i++)
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
255 dump_subscript (FILE *outf, struct subscript *subscript)
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
288 print_direction_vector (FILE *outf,
294 for (eq = 0; eq < length; eq++)
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
302 fprintf (outf, " +");
305 fprintf (outf, " -");
308 fprintf (outf, " =");
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
320 fprintf (outf, " *");
323 fprintf (outf, "indep");
327 fprintf (outf, "\n");
330 /* Print a vector of direction vectors. */
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
343 /* Print out a vector VEC of length N to OUTFILE. */
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
355 /* Print a vector of distance vectors. */
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 dump_data_dependence_relation (stderr, ddr);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
382 struct data_reference *dra, *drb;
384 fprintf (outf, "(Data Dep: \n");
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
393 dump_data_reference (outf, dra);
395 fprintf (outf, " (nil)\n");
397 dump_data_reference (outf, drb);
399 fprintf (outf, " (nil)\n");
401 fprintf (outf, " (don't know)\n)\n");
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
448 fprintf (outf, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
501 struct data_dependence_relation *ddr;
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
522 fprintf (file, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
531 struct data_dependence_relation *ddr;
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
536 fprintf (file, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
551 enum tree_code ocode = code;
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
563 case POINTER_PLUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
575 if (TREE_CODE (op1) != INTEGER_CST)
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
597 if (pbitpos % BITS_PER_UNIT != 0)
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
608 base, fold_convert (sizetype, poffset));
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
614 var0 = fold_convert (type, base);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
680 split_constant_offset (tree exp, tree *var, tree *off)
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
686 *off = ssize_int (0);
689 if (tree_is_chrec (exp))
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
697 *var = fold_convert (type, e);
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
706 canonicalize_base_object_address (tree addr)
712 /* The base address may be obtained by casting from integer, in that case
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
717 if (TREE_CODE (addr) != ADDR_EXPR)
720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
728 dr_analyze_innermost (struct data_reference *dr)
730 gimple stmt = DR_STMT (dr);
731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
733 HOST_WIDE_INT pbitsize, pbitpos;
735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
739 bool in_loop = (loop && loop->num);
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
748 if (pbitpos % BITS_PER_UNIT != 0)
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
755 if (TREE_CODE (base) == MEM_REF)
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
767 base = TREE_OPERAND (base, 0);
770 base = build_fold_addr_expr (base);
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
838 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
840 VEC (tree, heap) *access_fns = NULL;
841 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
842 tree base, off, access_fn = NULL_TREE;
843 basic_block before_loop = NULL;
846 before_loop = block_before_loop (nest);
848 while (handled_component_p (aref))
850 if (TREE_CODE (aref) == ARRAY_REF)
852 op = TREE_OPERAND (aref, 1);
855 access_fn = analyze_scalar_evolution (loop, op);
856 access_fn = instantiate_scev (before_loop, loop, access_fn);
857 VEC_safe_push (tree, heap, access_fns, access_fn);
860 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
863 aref = TREE_OPERAND (aref, 0);
867 && (INDIRECT_REF_P (aref)
868 || TREE_CODE (aref) == MEM_REF))
870 op = TREE_OPERAND (aref, 0);
871 access_fn = analyze_scalar_evolution (loop, op);
872 access_fn = instantiate_scev (before_loop, loop, access_fn);
873 base = initial_condition (access_fn);
874 split_constant_offset (base, &base, &off);
875 if (TREE_CODE (aref) == MEM_REF)
876 off = size_binop (PLUS_EXPR, off,
877 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
878 access_fn = chrec_replace_initial_condition (access_fn,
879 fold_convert (TREE_TYPE (base), off));
881 TREE_OPERAND (aref, 0) = base;
882 VEC_safe_push (tree, heap, access_fns, access_fn);
885 if (TREE_CODE (aref) == MEM_REF)
886 TREE_OPERAND (aref, 1)
887 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
889 if (TREE_CODE (ref) == MEM_REF
890 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
891 && integer_zerop (TREE_OPERAND (ref, 1)))
892 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
894 /* For canonicalization purposes we'd like to strip all outermost
895 zero-offset component-refs.
896 ??? For now simply handle zero-index array-refs. */
897 while (TREE_CODE (ref) == ARRAY_REF
898 && integer_zerop (TREE_OPERAND (ref, 1)))
899 ref = TREE_OPERAND (ref, 0);
901 DR_BASE_OBJECT (dr) = ref;
902 DR_ACCESS_FNS (dr) = access_fns;
905 /* Extracts the alias analysis information from the memory reference DR. */
908 dr_analyze_alias (struct data_reference *dr)
910 tree ref = DR_REF (dr);
911 tree base = get_base_address (ref), addr;
913 if (INDIRECT_REF_P (base)
914 || TREE_CODE (base) == MEM_REF)
916 addr = TREE_OPERAND (base, 0);
917 if (TREE_CODE (addr) == SSA_NAME)
918 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
922 /* Returns true if the address of DR is invariant. */
925 dr_address_invariant_p (struct data_reference *dr)
930 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
931 if (tree_contains_chrecs (idx, NULL))
937 /* Frees data reference DR. */
940 free_data_ref (data_reference_p dr)
942 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
946 /* Analyzes memory reference MEMREF accessed in STMT. The reference
947 is read if IS_READ is true, write otherwise. Returns the
948 data_reference description of MEMREF. NEST is the outermost loop
949 in which the reference should be instantiated, LOOP is the loop in
950 which the data reference should be analyzed. */
952 struct data_reference *
953 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
956 struct data_reference *dr;
958 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "Creating dr for ");
961 print_generic_expr (dump_file, memref, TDF_SLIM);
962 fprintf (dump_file, "\n");
965 dr = XCNEW (struct data_reference);
967 DR_REF (dr) = memref;
968 DR_IS_READ (dr) = is_read;
970 dr_analyze_innermost (dr);
971 dr_analyze_indices (dr, nest, loop);
972 dr_analyze_alias (dr);
974 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "\tbase_address: ");
977 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
978 fprintf (dump_file, "\n\toffset from base address: ");
979 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
980 fprintf (dump_file, "\n\tconstant offset from base address: ");
981 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
982 fprintf (dump_file, "\n\tstep: ");
983 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
984 fprintf (dump_file, "\n\taligned to: ");
985 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
986 fprintf (dump_file, "\n\tbase_object: ");
987 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
988 fprintf (dump_file, "\n");
994 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
997 dr_equal_offsets_p1 (tree offset1, tree offset2)
1001 STRIP_NOPS (offset1);
1002 STRIP_NOPS (offset2);
1004 if (offset1 == offset2)
1007 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1008 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1011 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1012 TREE_OPERAND (offset2, 0));
1014 if (!res || !BINARY_CLASS_P (offset1))
1017 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1018 TREE_OPERAND (offset2, 1));
1023 /* Check if DRA and DRB have equal offsets. */
1025 dr_equal_offsets_p (struct data_reference *dra,
1026 struct data_reference *drb)
1028 tree offset1, offset2;
1030 offset1 = DR_OFFSET (dra);
1031 offset2 = DR_OFFSET (drb);
1033 return dr_equal_offsets_p1 (offset1, offset2);
1036 /* Returns true if FNA == FNB. */
1039 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1041 unsigned i, n = VEC_length (tree, fna);
1043 if (n != VEC_length (tree, fnb))
1046 for (i = 0; i < n; i++)
1047 if (!operand_equal_p (VEC_index (tree, fna, i),
1048 VEC_index (tree, fnb, i), 0))
1054 /* If all the functions in CF are the same, returns one of them,
1055 otherwise returns NULL. */
1058 common_affine_function (conflict_function *cf)
1063 if (!CF_NONTRIVIAL_P (cf))
1068 for (i = 1; i < cf->n; i++)
1069 if (!affine_function_equal_p (comm, cf->fns[i]))
1075 /* Returns the base of the affine function FN. */
1078 affine_function_base (affine_fn fn)
1080 return VEC_index (tree, fn, 0);
1083 /* Returns true if FN is a constant. */
1086 affine_function_constant_p (affine_fn fn)
1091 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1092 if (!integer_zerop (coef))
1098 /* Returns true if FN is the zero constant function. */
1101 affine_function_zero_p (affine_fn fn)
1103 return (integer_zerop (affine_function_base (fn))
1104 && affine_function_constant_p (fn));
1107 /* Returns a signed integer type with the largest precision from TA
1111 signed_type_for_types (tree ta, tree tb)
1113 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1114 return signed_type_for (ta);
1116 return signed_type_for (tb);
1119 /* Applies operation OP on affine functions FNA and FNB, and returns the
1123 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1129 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1131 n = VEC_length (tree, fna);
1132 m = VEC_length (tree, fnb);
1136 n = VEC_length (tree, fnb);
1137 m = VEC_length (tree, fna);
1140 ret = VEC_alloc (tree, heap, m);
1141 for (i = 0; i < n; i++)
1143 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1144 TREE_TYPE (VEC_index (tree, fnb, i)));
1146 VEC_quick_push (tree, ret,
1147 fold_build2 (op, type,
1148 VEC_index (tree, fna, i),
1149 VEC_index (tree, fnb, i)));
1152 for (; VEC_iterate (tree, fna, i, coef); i++)
1153 VEC_quick_push (tree, ret,
1154 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1155 coef, integer_zero_node));
1156 for (; VEC_iterate (tree, fnb, i, coef); i++)
1157 VEC_quick_push (tree, ret,
1158 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1159 integer_zero_node, coef));
1164 /* Returns the sum of affine functions FNA and FNB. */
1167 affine_fn_plus (affine_fn fna, affine_fn fnb)
1169 return affine_fn_op (PLUS_EXPR, fna, fnb);
1172 /* Returns the difference of affine functions FNA and FNB. */
1175 affine_fn_minus (affine_fn fna, affine_fn fnb)
1177 return affine_fn_op (MINUS_EXPR, fna, fnb);
1180 /* Frees affine function FN. */
1183 affine_fn_free (affine_fn fn)
1185 VEC_free (tree, heap, fn);
1188 /* Determine for each subscript in the data dependence relation DDR
1192 compute_subscript_distance (struct data_dependence_relation *ddr)
1194 conflict_function *cf_a, *cf_b;
1195 affine_fn fn_a, fn_b, diff;
1197 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1201 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1203 struct subscript *subscript;
1205 subscript = DDR_SUBSCRIPT (ddr, i);
1206 cf_a = SUB_CONFLICTS_IN_A (subscript);
1207 cf_b = SUB_CONFLICTS_IN_B (subscript);
1209 fn_a = common_affine_function (cf_a);
1210 fn_b = common_affine_function (cf_b);
1213 SUB_DISTANCE (subscript) = chrec_dont_know;
1216 diff = affine_fn_minus (fn_a, fn_b);
1218 if (affine_function_constant_p (diff))
1219 SUB_DISTANCE (subscript) = affine_function_base (diff);
1221 SUB_DISTANCE (subscript) = chrec_dont_know;
1223 affine_fn_free (diff);
1228 /* Returns the conflict function for "unknown". */
1230 static conflict_function *
1231 conflict_fn_not_known (void)
1233 conflict_function *fn = XCNEW (conflict_function);
1239 /* Returns the conflict function for "independent". */
1241 static conflict_function *
1242 conflict_fn_no_dependence (void)
1244 conflict_function *fn = XCNEW (conflict_function);
1245 fn->n = NO_DEPENDENCE;
1250 /* Returns true if the address of OBJ is invariant in LOOP. */
1253 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1255 while (handled_component_p (obj))
1257 if (TREE_CODE (obj) == ARRAY_REF)
1259 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1260 need to check the stride and the lower bound of the reference. */
1261 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1263 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1267 else if (TREE_CODE (obj) == COMPONENT_REF)
1269 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1273 obj = TREE_OPERAND (obj, 0);
1276 if (!INDIRECT_REF_P (obj)
1277 && TREE_CODE (obj) != MEM_REF)
1280 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1284 /* Returns false if we can prove that data references A and B do not alias,
1288 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1290 tree addr_a = DR_BASE_OBJECT (a);
1291 tree addr_b = DR_BASE_OBJECT (b);
1293 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1294 return refs_output_dependent_p (addr_a, addr_b);
1295 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1296 return refs_anti_dependent_p (addr_a, addr_b);
1297 return refs_may_alias_p (addr_a, addr_b);
1300 static void compute_self_dependence (struct data_dependence_relation *);
1302 /* Initialize a data dependence relation between data accesses A and
1303 B. NB_LOOPS is the number of loops surrounding the references: the
1304 size of the classic distance/direction vectors. */
1306 static struct data_dependence_relation *
1307 initialize_data_dependence_relation (struct data_reference *a,
1308 struct data_reference *b,
1309 VEC (loop_p, heap) *loop_nest)
1311 struct data_dependence_relation *res;
1314 res = XNEW (struct data_dependence_relation);
1317 DDR_LOOP_NEST (res) = NULL;
1318 DDR_REVERSED_P (res) = false;
1319 DDR_SUBSCRIPTS (res) = NULL;
1320 DDR_DIR_VECTS (res) = NULL;
1321 DDR_DIST_VECTS (res) = NULL;
1323 if (a == NULL || b == NULL)
1325 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1329 /* If the data references do not alias, then they are independent. */
1330 if (!dr_may_alias_p (a, b))
1332 DDR_ARE_DEPENDENT (res) = chrec_known;
1336 /* When the references are exactly the same, don't spend time doing
1337 the data dependence tests, just initialize the ddr and return. */
1338 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1340 DDR_AFFINE_P (res) = true;
1341 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1342 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1343 DDR_LOOP_NEST (res) = loop_nest;
1344 DDR_INNER_LOOP (res) = 0;
1345 DDR_SELF_REFERENCE (res) = true;
1346 compute_self_dependence (res);
1350 /* If the references do not access the same object, we do not know
1351 whether they alias or not. */
1352 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1354 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1358 /* If the base of the object is not invariant in the loop nest, we cannot
1359 analyze it. TODO -- in fact, it would suffice to record that there may
1360 be arbitrary dependences in the loops where the base object varies. */
1362 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1363 DR_BASE_OBJECT (a)))
1365 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1369 /* If the number of dimensions of the access to not agree we can have
1370 a pointer access to a component of the array element type and an
1371 array access while the base-objects are still the same. Punt. */
1372 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1374 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1378 DDR_AFFINE_P (res) = true;
1379 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1380 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1381 DDR_LOOP_NEST (res) = loop_nest;
1382 DDR_INNER_LOOP (res) = 0;
1383 DDR_SELF_REFERENCE (res) = false;
1385 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1387 struct subscript *subscript;
1389 subscript = XNEW (struct subscript);
1390 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1391 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1392 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1393 SUB_DISTANCE (subscript) = chrec_dont_know;
1394 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1400 /* Frees memory used by the conflict function F. */
1403 free_conflict_function (conflict_function *f)
1407 if (CF_NONTRIVIAL_P (f))
1409 for (i = 0; i < f->n; i++)
1410 affine_fn_free (f->fns[i]);
1415 /* Frees memory used by SUBSCRIPTS. */
1418 free_subscripts (VEC (subscript_p, heap) *subscripts)
1423 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1425 free_conflict_function (s->conflicting_iterations_in_a);
1426 free_conflict_function (s->conflicting_iterations_in_b);
1429 VEC_free (subscript_p, heap, subscripts);
1432 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1436 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1439 if (dump_file && (dump_flags & TDF_DETAILS))
1441 fprintf (dump_file, "(dependence classified: ");
1442 print_generic_expr (dump_file, chrec, 0);
1443 fprintf (dump_file, ")\n");
1446 DDR_ARE_DEPENDENT (ddr) = chrec;
1447 free_subscripts (DDR_SUBSCRIPTS (ddr));
1448 DDR_SUBSCRIPTS (ddr) = NULL;
1451 /* The dependence relation DDR cannot be represented by a distance
1455 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1457 if (dump_file && (dump_flags & TDF_DETAILS))
1458 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1460 DDR_AFFINE_P (ddr) = false;
1465 /* This section contains the classic Banerjee tests. */
1467 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1468 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1471 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1473 return (evolution_function_is_constant_p (chrec_a)
1474 && evolution_function_is_constant_p (chrec_b));
1477 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1478 variable, i.e., if the SIV (Single Index Variable) test is true. */
1481 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1483 if ((evolution_function_is_constant_p (chrec_a)
1484 && evolution_function_is_univariate_p (chrec_b))
1485 || (evolution_function_is_constant_p (chrec_b)
1486 && evolution_function_is_univariate_p (chrec_a)))
1489 if (evolution_function_is_univariate_p (chrec_a)
1490 && evolution_function_is_univariate_p (chrec_b))
1492 switch (TREE_CODE (chrec_a))
1494 case POLYNOMIAL_CHREC:
1495 switch (TREE_CODE (chrec_b))
1497 case POLYNOMIAL_CHREC:
1498 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1513 /* Creates a conflict function with N dimensions. The affine functions
1514 in each dimension follow. */
1516 static conflict_function *
1517 conflict_fn (unsigned n, ...)
1520 conflict_function *ret = XCNEW (conflict_function);
1523 gcc_assert (0 < n && n <= MAX_DIM);
1527 for (i = 0; i < n; i++)
1528 ret->fns[i] = va_arg (ap, affine_fn);
1534 /* Returns constant affine function with value CST. */
1537 affine_fn_cst (tree cst)
1539 affine_fn fn = VEC_alloc (tree, heap, 1);
1540 VEC_quick_push (tree, fn, cst);
1544 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1547 affine_fn_univar (tree cst, unsigned dim, tree coef)
1549 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1552 gcc_assert (dim > 0);
1553 VEC_quick_push (tree, fn, cst);
1554 for (i = 1; i < dim; i++)
1555 VEC_quick_push (tree, fn, integer_zero_node);
1556 VEC_quick_push (tree, fn, coef);
1560 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1561 *OVERLAPS_B are initialized to the functions that describe the
1562 relation between the elements accessed twice by CHREC_A and
1563 CHREC_B. For k >= 0, the following property is verified:
1565 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1568 analyze_ziv_subscript (tree chrec_a,
1570 conflict_function **overlaps_a,
1571 conflict_function **overlaps_b,
1572 tree *last_conflicts)
1574 tree type, difference;
1575 dependence_stats.num_ziv++;
1577 if (dump_file && (dump_flags & TDF_DETAILS))
1578 fprintf (dump_file, "(analyze_ziv_subscript \n");
1580 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1581 chrec_a = chrec_convert (type, chrec_a, NULL);
1582 chrec_b = chrec_convert (type, chrec_b, NULL);
1583 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1585 switch (TREE_CODE (difference))
1588 if (integer_zerop (difference))
1590 /* The difference is equal to zero: the accessed index
1591 overlaps for each iteration in the loop. */
1592 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1593 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1594 *last_conflicts = chrec_dont_know;
1595 dependence_stats.num_ziv_dependent++;
1599 /* The accesses do not overlap. */
1600 *overlaps_a = conflict_fn_no_dependence ();
1601 *overlaps_b = conflict_fn_no_dependence ();
1602 *last_conflicts = integer_zero_node;
1603 dependence_stats.num_ziv_independent++;
1608 /* We're not sure whether the indexes overlap. For the moment,
1609 conservatively answer "don't know". */
1610 if (dump_file && (dump_flags & TDF_DETAILS))
1611 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1613 *overlaps_a = conflict_fn_not_known ();
1614 *overlaps_b = conflict_fn_not_known ();
1615 *last_conflicts = chrec_dont_know;
1616 dependence_stats.num_ziv_unimplemented++;
1620 if (dump_file && (dump_flags & TDF_DETAILS))
1621 fprintf (dump_file, ")\n");
1624 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1625 and only if it fits to the int type. If this is not the case, or the
1626 bound on the number of iterations of LOOP could not be derived, returns
1630 max_stmt_executions_tree (struct loop *loop)
1635 if (!max_stmt_executions (loop, true, &nit))
1636 return chrec_dont_know;
1638 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1639 if (!double_int_fits_to_tree_p (type, nit))
1640 return chrec_dont_know;
1642 return double_int_to_tree (type, nit);
1645 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1646 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1647 *OVERLAPS_B are initialized to the functions that describe the
1648 relation between the elements accessed twice by CHREC_A and
1649 CHREC_B. For k >= 0, the following property is verified:
1651 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1654 analyze_siv_subscript_cst_affine (tree chrec_a,
1656 conflict_function **overlaps_a,
1657 conflict_function **overlaps_b,
1658 tree *last_conflicts)
1660 bool value0, value1, value2;
1661 tree type, difference, tmp;
1663 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1664 chrec_a = chrec_convert (type, chrec_a, NULL);
1665 chrec_b = chrec_convert (type, chrec_b, NULL);
1666 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1668 if (!chrec_is_positive (initial_condition (difference), &value0))
1670 if (dump_file && (dump_flags & TDF_DETAILS))
1671 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1673 dependence_stats.num_siv_unimplemented++;
1674 *overlaps_a = conflict_fn_not_known ();
1675 *overlaps_b = conflict_fn_not_known ();
1676 *last_conflicts = chrec_dont_know;
1681 if (value0 == false)
1683 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1685 if (dump_file && (dump_flags & TDF_DETAILS))
1686 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1688 *overlaps_a = conflict_fn_not_known ();
1689 *overlaps_b = conflict_fn_not_known ();
1690 *last_conflicts = chrec_dont_know;
1691 dependence_stats.num_siv_unimplemented++;
1700 chrec_b = {10, +, 1}
1703 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1705 HOST_WIDE_INT numiter;
1706 struct loop *loop = get_chrec_loop (chrec_b);
1708 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1709 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1710 fold_build1 (ABS_EXPR, type, difference),
1711 CHREC_RIGHT (chrec_b));
1712 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1713 *last_conflicts = integer_one_node;
1716 /* Perform weak-zero siv test to see if overlap is
1717 outside the loop bounds. */
1718 numiter = max_stmt_executions_int (loop, true);
1721 && compare_tree_int (tmp, numiter) > 0)
1723 free_conflict_function (*overlaps_a);
1724 free_conflict_function (*overlaps_b);
1725 *overlaps_a = conflict_fn_no_dependence ();
1726 *overlaps_b = conflict_fn_no_dependence ();
1727 *last_conflicts = integer_zero_node;
1728 dependence_stats.num_siv_independent++;
1731 dependence_stats.num_siv_dependent++;
1735 /* When the step does not divide the difference, there are
1739 *overlaps_a = conflict_fn_no_dependence ();
1740 *overlaps_b = conflict_fn_no_dependence ();
1741 *last_conflicts = integer_zero_node;
1742 dependence_stats.num_siv_independent++;
1751 chrec_b = {10, +, -1}
1753 In this case, chrec_a will not overlap with chrec_b. */
1754 *overlaps_a = conflict_fn_no_dependence ();
1755 *overlaps_b = conflict_fn_no_dependence ();
1756 *last_conflicts = integer_zero_node;
1757 dependence_stats.num_siv_independent++;
1764 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1766 if (dump_file && (dump_flags & TDF_DETAILS))
1767 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1769 *overlaps_a = conflict_fn_not_known ();
1770 *overlaps_b = conflict_fn_not_known ();
1771 *last_conflicts = chrec_dont_know;
1772 dependence_stats.num_siv_unimplemented++;
1777 if (value2 == false)
1781 chrec_b = {10, +, -1}
1783 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1785 HOST_WIDE_INT numiter;
1786 struct loop *loop = get_chrec_loop (chrec_b);
1788 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1789 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1790 CHREC_RIGHT (chrec_b));
1791 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1792 *last_conflicts = integer_one_node;
1794 /* Perform weak-zero siv test to see if overlap is
1795 outside the loop bounds. */
1796 numiter = max_stmt_executions_int (loop, true);
1799 && compare_tree_int (tmp, numiter) > 0)
1801 free_conflict_function (*overlaps_a);
1802 free_conflict_function (*overlaps_b);
1803 *overlaps_a = conflict_fn_no_dependence ();
1804 *overlaps_b = conflict_fn_no_dependence ();
1805 *last_conflicts = integer_zero_node;
1806 dependence_stats.num_siv_independent++;
1809 dependence_stats.num_siv_dependent++;
1813 /* When the step does not divide the difference, there
1817 *overlaps_a = conflict_fn_no_dependence ();
1818 *overlaps_b = conflict_fn_no_dependence ();
1819 *last_conflicts = integer_zero_node;
1820 dependence_stats.num_siv_independent++;
1830 In this case, chrec_a will not overlap with chrec_b. */
1831 *overlaps_a = conflict_fn_no_dependence ();
1832 *overlaps_b = conflict_fn_no_dependence ();
1833 *last_conflicts = integer_zero_node;
1834 dependence_stats.num_siv_independent++;
1842 /* Helper recursive function for initializing the matrix A. Returns
1843 the initial value of CHREC. */
1846 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1850 switch (TREE_CODE (chrec))
1852 case POLYNOMIAL_CHREC:
1853 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1855 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1856 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1862 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1863 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1865 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1870 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1871 return chrec_convert (chrec_type (chrec), op, NULL);
1876 /* Handle ~X as -1 - X. */
1877 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1878 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1879 build_int_cst (TREE_TYPE (chrec), -1), op);
1891 #define FLOOR_DIV(x,y) ((x) / (y))
1893 /* Solves the special case of the Diophantine equation:
1894 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1896 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1897 number of iterations that loops X and Y run. The overlaps will be
1898 constructed as evolutions in dimension DIM. */
1901 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1902 affine_fn *overlaps_a,
1903 affine_fn *overlaps_b,
1904 tree *last_conflicts, int dim)
1906 if (((step_a > 0 && step_b > 0)
1907 || (step_a < 0 && step_b < 0)))
1909 int step_overlaps_a, step_overlaps_b;
1910 int gcd_steps_a_b, last_conflict, tau2;
1912 gcd_steps_a_b = gcd (step_a, step_b);
1913 step_overlaps_a = step_b / gcd_steps_a_b;
1914 step_overlaps_b = step_a / gcd_steps_a_b;
1918 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1919 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1920 last_conflict = tau2;
1921 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1924 *last_conflicts = chrec_dont_know;
1926 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1927 build_int_cst (NULL_TREE,
1929 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1930 build_int_cst (NULL_TREE,
1936 *overlaps_a = affine_fn_cst (integer_zero_node);
1937 *overlaps_b = affine_fn_cst (integer_zero_node);
1938 *last_conflicts = integer_zero_node;
1942 /* Solves the special case of a Diophantine equation where CHREC_A is
1943 an affine bivariate function, and CHREC_B is an affine univariate
1944 function. For example,
1946 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1948 has the following overlapping functions:
1950 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1951 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1952 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1954 FORNOW: This is a specialized implementation for a case occurring in
1955 a common benchmark. Implement the general algorithm. */
1958 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1959 conflict_function **overlaps_a,
1960 conflict_function **overlaps_b,
1961 tree *last_conflicts)
1963 bool xz_p, yz_p, xyz_p;
1964 int step_x, step_y, step_z;
1965 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1966 affine_fn overlaps_a_xz, overlaps_b_xz;
1967 affine_fn overlaps_a_yz, overlaps_b_yz;
1968 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1969 affine_fn ova1, ova2, ovb;
1970 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1972 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1973 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1974 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1977 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
1978 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
1979 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
1981 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1983 if (dump_file && (dump_flags & TDF_DETAILS))
1984 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1986 *overlaps_a = conflict_fn_not_known ();
1987 *overlaps_b = conflict_fn_not_known ();
1988 *last_conflicts = chrec_dont_know;
1992 niter = MIN (niter_x, niter_z);
1993 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1996 &last_conflicts_xz, 1);
1997 niter = MIN (niter_y, niter_z);
1998 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2001 &last_conflicts_yz, 2);
2002 niter = MIN (niter_x, niter_z);
2003 niter = MIN (niter_y, niter);
2004 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2007 &last_conflicts_xyz, 3);
2009 xz_p = !integer_zerop (last_conflicts_xz);
2010 yz_p = !integer_zerop (last_conflicts_yz);
2011 xyz_p = !integer_zerop (last_conflicts_xyz);
2013 if (xz_p || yz_p || xyz_p)
2015 ova1 = affine_fn_cst (integer_zero_node);
2016 ova2 = affine_fn_cst (integer_zero_node);
2017 ovb = affine_fn_cst (integer_zero_node);
2020 affine_fn t0 = ova1;
2023 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2024 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2025 affine_fn_free (t0);
2026 affine_fn_free (t2);
2027 *last_conflicts = last_conflicts_xz;
2031 affine_fn t0 = ova2;
2034 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2035 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2036 affine_fn_free (t0);
2037 affine_fn_free (t2);
2038 *last_conflicts = last_conflicts_yz;
2042 affine_fn t0 = ova1;
2043 affine_fn t2 = ova2;
2046 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2047 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2048 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2049 affine_fn_free (t0);
2050 affine_fn_free (t2);
2051 affine_fn_free (t4);
2052 *last_conflicts = last_conflicts_xyz;
2054 *overlaps_a = conflict_fn (2, ova1, ova2);
2055 *overlaps_b = conflict_fn (1, ovb);
2059 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2060 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2061 *last_conflicts = integer_zero_node;
2064 affine_fn_free (overlaps_a_xz);
2065 affine_fn_free (overlaps_b_xz);
2066 affine_fn_free (overlaps_a_yz);
2067 affine_fn_free (overlaps_b_yz);
2068 affine_fn_free (overlaps_a_xyz);
2069 affine_fn_free (overlaps_b_xyz);
2072 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2075 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2078 memcpy (vec2, vec1, size * sizeof (*vec1));
2081 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2084 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2089 for (i = 0; i < m; i++)
2090 lambda_vector_copy (mat1[i], mat2[i], n);
2093 /* Store the N x N identity matrix in MAT. */
2096 lambda_matrix_id (lambda_matrix mat, int size)
2100 for (i = 0; i < size; i++)
2101 for (j = 0; j < size; j++)
2102 mat[i][j] = (i == j) ? 1 : 0;
2105 /* Return the first nonzero element of vector VEC1 between START and N.
2106 We must have START <= N. Returns N if VEC1 is the zero vector. */
2109 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2112 while (j < n && vec1[j] == 0)
2117 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2118 R2 = R2 + CONST1 * R1. */
2121 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2128 for (i = 0; i < n; i++)
2129 mat[r2][i] += const1 * mat[r1][i];
2132 /* Swap rows R1 and R2 in matrix MAT. */
2135 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2144 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2145 and store the result in VEC2. */
2148 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2149 int size, int const1)
2154 lambda_vector_clear (vec2, size);
2156 for (i = 0; i < size; i++)
2157 vec2[i] = const1 * vec1[i];
2160 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2163 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2166 lambda_vector_mult_const (vec1, vec2, size, -1);
2169 /* Negate row R1 of matrix MAT which has N columns. */
2172 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2174 lambda_vector_negate (mat[r1], mat[r1], n);
2177 /* Return true if two vectors are equal. */
2180 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2183 for (i = 0; i < size; i++)
2184 if (vec1[i] != vec2[i])
2189 /* Given an M x N integer matrix A, this function determines an M x
2190 M unimodular matrix U, and an M x N echelon matrix S such that
2191 "U.A = S". This decomposition is also known as "right Hermite".
2193 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2194 Restructuring Compilers" Utpal Banerjee. */
2197 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2198 lambda_matrix S, lambda_matrix U)
2202 lambda_matrix_copy (A, S, m, n);
2203 lambda_matrix_id (U, m);
2205 for (j = 0; j < n; j++)
2207 if (lambda_vector_first_nz (S[j], m, i0) < m)
2210 for (i = m - 1; i >= i0; i--)
2212 while (S[i][j] != 0)
2214 int sigma, factor, a, b;
2218 sigma = (a * b < 0) ? -1: 1;
2221 factor = sigma * (a / b);
2223 lambda_matrix_row_add (S, n, i, i-1, -factor);
2224 lambda_matrix_row_exchange (S, i, i-1);
2226 lambda_matrix_row_add (U, m, i, i-1, -factor);
2227 lambda_matrix_row_exchange (U, i, i-1);
2234 /* Determines the overlapping elements due to accesses CHREC_A and
2235 CHREC_B, that are affine functions. This function cannot handle
2236 symbolic evolution functions, ie. when initial conditions are
2237 parameters, because it uses lambda matrices of integers. */
2240 analyze_subscript_affine_affine (tree chrec_a,
2242 conflict_function **overlaps_a,
2243 conflict_function **overlaps_b,
2244 tree *last_conflicts)
2246 unsigned nb_vars_a, nb_vars_b, dim;
2247 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2248 lambda_matrix A, U, S;
2249 struct obstack scratch_obstack;
2251 if (eq_evolutions_p (chrec_a, chrec_b))
2253 /* The accessed index overlaps for each iteration in the
2255 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2256 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2257 *last_conflicts = chrec_dont_know;
2260 if (dump_file && (dump_flags & TDF_DETAILS))
2261 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2263 /* For determining the initial intersection, we have to solve a
2264 Diophantine equation. This is the most time consuming part.
2266 For answering to the question: "Is there a dependence?" we have
2267 to prove that there exists a solution to the Diophantine
2268 equation, and that the solution is in the iteration domain,
2269 i.e. the solution is positive or zero, and that the solution
2270 happens before the upper bound loop.nb_iterations. Otherwise
2271 there is no dependence. This function outputs a description of
2272 the iterations that hold the intersections. */
2274 nb_vars_a = nb_vars_in_chrec (chrec_a);
2275 nb_vars_b = nb_vars_in_chrec (chrec_b);
2277 gcc_obstack_init (&scratch_obstack);
2279 dim = nb_vars_a + nb_vars_b;
2280 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2281 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2282 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2284 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2285 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2286 gamma = init_b - init_a;
2288 /* Don't do all the hard work of solving the Diophantine equation
2289 when we already know the solution: for example,
2292 | gamma = 3 - 3 = 0.
2293 Then the first overlap occurs during the first iterations:
2294 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2298 if (nb_vars_a == 1 && nb_vars_b == 1)
2300 HOST_WIDE_INT step_a, step_b;
2301 HOST_WIDE_INT niter, niter_a, niter_b;
2304 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2305 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2306 niter = MIN (niter_a, niter_b);
2307 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2308 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2310 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2313 *overlaps_a = conflict_fn (1, ova);
2314 *overlaps_b = conflict_fn (1, ovb);
2317 else if (nb_vars_a == 2 && nb_vars_b == 1)
2318 compute_overlap_steps_for_affine_1_2
2319 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2321 else if (nb_vars_a == 1 && nb_vars_b == 2)
2322 compute_overlap_steps_for_affine_1_2
2323 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2327 if (dump_file && (dump_flags & TDF_DETAILS))
2328 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2329 *overlaps_a = conflict_fn_not_known ();
2330 *overlaps_b = conflict_fn_not_known ();
2331 *last_conflicts = chrec_dont_know;
2333 goto end_analyze_subs_aa;
2337 lambda_matrix_right_hermite (A, dim, 1, S, U);
2342 lambda_matrix_row_negate (U, dim, 0);
2344 gcd_alpha_beta = S[0][0];
2346 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2347 but that is a quite strange case. Instead of ICEing, answer
2349 if (gcd_alpha_beta == 0)
2351 *overlaps_a = conflict_fn_not_known ();
2352 *overlaps_b = conflict_fn_not_known ();
2353 *last_conflicts = chrec_dont_know;
2354 goto end_analyze_subs_aa;
2357 /* The classic "gcd-test". */
2358 if (!int_divides_p (gcd_alpha_beta, gamma))
2360 /* The "gcd-test" has determined that there is no integer
2361 solution, i.e. there is no dependence. */
2362 *overlaps_a = conflict_fn_no_dependence ();
2363 *overlaps_b = conflict_fn_no_dependence ();
2364 *last_conflicts = integer_zero_node;
2367 /* Both access functions are univariate. This includes SIV and MIV cases. */
2368 else if (nb_vars_a == 1 && nb_vars_b == 1)
2370 /* Both functions should have the same evolution sign. */
2371 if (((A[0][0] > 0 && -A[1][0] > 0)
2372 || (A[0][0] < 0 && -A[1][0] < 0)))
2374 /* The solutions are given by:
2376 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2379 For a given integer t. Using the following variables,
2381 | i0 = u11 * gamma / gcd_alpha_beta
2382 | j0 = u12 * gamma / gcd_alpha_beta
2389 | y0 = j0 + j1 * t. */
2390 HOST_WIDE_INT i0, j0, i1, j1;
2392 i0 = U[0][0] * gamma / gcd_alpha_beta;
2393 j0 = U[0][1] * gamma / gcd_alpha_beta;
2397 if ((i1 == 0 && i0 < 0)
2398 || (j1 == 0 && j0 < 0))
2400 /* There is no solution.
2401 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2402 falls in here, but for the moment we don't look at the
2403 upper bound of the iteration domain. */
2404 *overlaps_a = conflict_fn_no_dependence ();
2405 *overlaps_b = conflict_fn_no_dependence ();
2406 *last_conflicts = integer_zero_node;
2407 goto end_analyze_subs_aa;
2410 if (i1 > 0 && j1 > 0)
2412 HOST_WIDE_INT niter_a = max_stmt_executions_int
2413 (get_chrec_loop (chrec_a), true);
2414 HOST_WIDE_INT niter_b = max_stmt_executions_int
2415 (get_chrec_loop (chrec_b), true);
2416 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2418 /* (X0, Y0) is a solution of the Diophantine equation:
2419 "chrec_a (X0) = chrec_b (Y0)". */
2420 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2422 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2423 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2425 /* (X1, Y1) is the smallest positive solution of the eq
2426 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2427 first conflict occurs. */
2428 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2429 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2430 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2434 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2435 FLOOR_DIV (niter - j0, j1));
2436 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2438 /* If the overlap occurs outside of the bounds of the
2439 loop, there is no dependence. */
2440 if (x1 >= niter || y1 >= niter)
2442 *overlaps_a = conflict_fn_no_dependence ();
2443 *overlaps_b = conflict_fn_no_dependence ();
2444 *last_conflicts = integer_zero_node;
2445 goto end_analyze_subs_aa;
2448 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2451 *last_conflicts = chrec_dont_know;
2455 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2457 build_int_cst (NULL_TREE, i1)));
2460 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2462 build_int_cst (NULL_TREE, j1)));
2466 /* FIXME: For the moment, the upper bound of the
2467 iteration domain for i and j is not checked. */
2468 if (dump_file && (dump_flags & TDF_DETAILS))
2469 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2470 *overlaps_a = conflict_fn_not_known ();
2471 *overlaps_b = conflict_fn_not_known ();
2472 *last_conflicts = chrec_dont_know;
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2479 *overlaps_a = conflict_fn_not_known ();
2480 *overlaps_b = conflict_fn_not_known ();
2481 *last_conflicts = chrec_dont_know;
2486 if (dump_file && (dump_flags & TDF_DETAILS))
2487 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2488 *overlaps_a = conflict_fn_not_known ();
2489 *overlaps_b = conflict_fn_not_known ();
2490 *last_conflicts = chrec_dont_know;
2493 end_analyze_subs_aa:
2494 obstack_free (&scratch_obstack, NULL);
2495 if (dump_file && (dump_flags & TDF_DETAILS))
2497 fprintf (dump_file, " (overlaps_a = ");
2498 dump_conflict_function (dump_file, *overlaps_a);
2499 fprintf (dump_file, ")\n (overlaps_b = ");
2500 dump_conflict_function (dump_file, *overlaps_b);
2501 fprintf (dump_file, ")\n");
2502 fprintf (dump_file, ")\n");
2506 /* Returns true when analyze_subscript_affine_affine can be used for
2507 determining the dependence relation between chrec_a and chrec_b,
2508 that contain symbols. This function modifies chrec_a and chrec_b
2509 such that the analysis result is the same, and such that they don't
2510 contain symbols, and then can safely be passed to the analyzer.
2512 Example: The analysis of the following tuples of evolutions produce
2513 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2516 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2517 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2521 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2523 tree diff, type, left_a, left_b, right_b;
2525 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2526 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2527 /* FIXME: For the moment not handled. Might be refined later. */
2530 type = chrec_type (*chrec_a);
2531 left_a = CHREC_LEFT (*chrec_a);
2532 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2533 diff = chrec_fold_minus (type, left_a, left_b);
2535 if (!evolution_function_is_constant_p (diff))
2538 if (dump_file && (dump_flags & TDF_DETAILS))
2539 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2541 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2542 diff, CHREC_RIGHT (*chrec_a));
2543 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2544 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2545 build_int_cst (type, 0),
2550 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2551 *OVERLAPS_B are initialized to the functions that describe the
2552 relation between the elements accessed twice by CHREC_A and
2553 CHREC_B. For k >= 0, the following property is verified:
2555 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2558 analyze_siv_subscript (tree chrec_a,
2560 conflict_function **overlaps_a,
2561 conflict_function **overlaps_b,
2562 tree *last_conflicts,
2565 dependence_stats.num_siv++;
2567 if (dump_file && (dump_flags & TDF_DETAILS))
2568 fprintf (dump_file, "(analyze_siv_subscript \n");
2570 if (evolution_function_is_constant_p (chrec_a)
2571 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2572 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2573 overlaps_a, overlaps_b, last_conflicts);
2575 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2576 && evolution_function_is_constant_p (chrec_b))
2577 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2578 overlaps_b, overlaps_a, last_conflicts);
2580 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2581 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2583 if (!chrec_contains_symbols (chrec_a)
2584 && !chrec_contains_symbols (chrec_b))
2586 analyze_subscript_affine_affine (chrec_a, chrec_b,
2587 overlaps_a, overlaps_b,
2590 if (CF_NOT_KNOWN_P (*overlaps_a)
2591 || CF_NOT_KNOWN_P (*overlaps_b))
2592 dependence_stats.num_siv_unimplemented++;
2593 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2594 || CF_NO_DEPENDENCE_P (*overlaps_b))
2595 dependence_stats.num_siv_independent++;
2597 dependence_stats.num_siv_dependent++;
2599 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2602 analyze_subscript_affine_affine (chrec_a, chrec_b,
2603 overlaps_a, overlaps_b,
2606 if (CF_NOT_KNOWN_P (*overlaps_a)
2607 || CF_NOT_KNOWN_P (*overlaps_b))
2608 dependence_stats.num_siv_unimplemented++;
2609 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2610 || CF_NO_DEPENDENCE_P (*overlaps_b))
2611 dependence_stats.num_siv_independent++;
2613 dependence_stats.num_siv_dependent++;
2616 goto siv_subscript_dontknow;
2621 siv_subscript_dontknow:;
2622 if (dump_file && (dump_flags & TDF_DETAILS))
2623 fprintf (dump_file, "siv test failed: unimplemented.\n");
2624 *overlaps_a = conflict_fn_not_known ();
2625 *overlaps_b = conflict_fn_not_known ();
2626 *last_conflicts = chrec_dont_know;
2627 dependence_stats.num_siv_unimplemented++;
2630 if (dump_file && (dump_flags & TDF_DETAILS))
2631 fprintf (dump_file, ")\n");
2634 /* Returns false if we can prove that the greatest common divisor of the steps
2635 of CHREC does not divide CST, false otherwise. */
2638 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2640 HOST_WIDE_INT cd = 0, val;
2643 if (!host_integerp (cst, 0))
2645 val = tree_low_cst (cst, 0);
2647 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2649 step = CHREC_RIGHT (chrec);
2650 if (!host_integerp (step, 0))
2652 cd = gcd (cd, tree_low_cst (step, 0));
2653 chrec = CHREC_LEFT (chrec);
2656 return val % cd == 0;
2659 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2660 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2661 functions that describe the relation between the elements accessed
2662 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2665 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2668 analyze_miv_subscript (tree chrec_a,
2670 conflict_function **overlaps_a,
2671 conflict_function **overlaps_b,
2672 tree *last_conflicts,
2673 struct loop *loop_nest)
2675 tree type, difference;
2677 dependence_stats.num_miv++;
2678 if (dump_file && (dump_flags & TDF_DETAILS))
2679 fprintf (dump_file, "(analyze_miv_subscript \n");
2681 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2682 chrec_a = chrec_convert (type, chrec_a, NULL);
2683 chrec_b = chrec_convert (type, chrec_b, NULL);
2684 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2686 if (eq_evolutions_p (chrec_a, chrec_b))
2688 /* Access functions are the same: all the elements are accessed
2689 in the same order. */
2690 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2691 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2692 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2693 dependence_stats.num_miv_dependent++;
2696 else if (evolution_function_is_constant_p (difference)
2697 /* For the moment, the following is verified:
2698 evolution_function_is_affine_multivariate_p (chrec_a,
2700 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2702 /* testsuite/.../ssa-chrec-33.c
2703 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2705 The difference is 1, and all the evolution steps are multiples
2706 of 2, consequently there are no overlapping elements. */
2707 *overlaps_a = conflict_fn_no_dependence ();
2708 *overlaps_b = conflict_fn_no_dependence ();
2709 *last_conflicts = integer_zero_node;
2710 dependence_stats.num_miv_independent++;
2713 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2714 && !chrec_contains_symbols (chrec_a)
2715 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2716 && !chrec_contains_symbols (chrec_b))
2718 /* testsuite/.../ssa-chrec-35.c
2719 {0, +, 1}_2 vs. {0, +, 1}_3
2720 the overlapping elements are respectively located at iterations:
2721 {0, +, 1}_x and {0, +, 1}_x,
2722 in other words, we have the equality:
2723 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2726 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2727 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2729 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2730 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2732 analyze_subscript_affine_affine (chrec_a, chrec_b,
2733 overlaps_a, overlaps_b, last_conflicts);
2735 if (CF_NOT_KNOWN_P (*overlaps_a)
2736 || CF_NOT_KNOWN_P (*overlaps_b))
2737 dependence_stats.num_miv_unimplemented++;
2738 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2739 || CF_NO_DEPENDENCE_P (*overlaps_b))
2740 dependence_stats.num_miv_independent++;
2742 dependence_stats.num_miv_dependent++;
2747 /* When the analysis is too difficult, answer "don't know". */
2748 if (dump_file && (dump_flags & TDF_DETAILS))
2749 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2751 *overlaps_a = conflict_fn_not_known ();
2752 *overlaps_b = conflict_fn_not_known ();
2753 *last_conflicts = chrec_dont_know;
2754 dependence_stats.num_miv_unimplemented++;
2757 if (dump_file && (dump_flags & TDF_DETAILS))
2758 fprintf (dump_file, ")\n");
2761 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2762 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2763 OVERLAP_ITERATIONS_B are initialized with two functions that
2764 describe the iterations that contain conflicting elements.
2766 Remark: For an integer k >= 0, the following equality is true:
2768 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2772 analyze_overlapping_iterations (tree chrec_a,
2774 conflict_function **overlap_iterations_a,
2775 conflict_function **overlap_iterations_b,
2776 tree *last_conflicts, struct loop *loop_nest)
2778 unsigned int lnn = loop_nest->num;
2780 dependence_stats.num_subscript_tests++;
2782 if (dump_file && (dump_flags & TDF_DETAILS))
2784 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2785 fprintf (dump_file, " (chrec_a = ");
2786 print_generic_expr (dump_file, chrec_a, 0);
2787 fprintf (dump_file, ")\n (chrec_b = ");
2788 print_generic_expr (dump_file, chrec_b, 0);
2789 fprintf (dump_file, ")\n");
2792 if (chrec_a == NULL_TREE
2793 || chrec_b == NULL_TREE
2794 || chrec_contains_undetermined (chrec_a)
2795 || chrec_contains_undetermined (chrec_b))
2797 dependence_stats.num_subscript_undetermined++;
2799 *overlap_iterations_a = conflict_fn_not_known ();
2800 *overlap_iterations_b = conflict_fn_not_known ();
2803 /* If they are the same chrec, and are affine, they overlap
2804 on every iteration. */
2805 else if (eq_evolutions_p (chrec_a, chrec_b)
2806 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2807 || operand_equal_p (chrec_a, chrec_b, 0)))
2809 dependence_stats.num_same_subscript_function++;
2810 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2811 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2812 *last_conflicts = chrec_dont_know;
2815 /* If they aren't the same, and aren't affine, we can't do anything
2817 else if ((chrec_contains_symbols (chrec_a)
2818 || chrec_contains_symbols (chrec_b))
2819 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2820 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2822 dependence_stats.num_subscript_undetermined++;
2823 *overlap_iterations_a = conflict_fn_not_known ();
2824 *overlap_iterations_b = conflict_fn_not_known ();
2827 else if (ziv_subscript_p (chrec_a, chrec_b))
2828 analyze_ziv_subscript (chrec_a, chrec_b,
2829 overlap_iterations_a, overlap_iterations_b,
2832 else if (siv_subscript_p (chrec_a, chrec_b))
2833 analyze_siv_subscript (chrec_a, chrec_b,
2834 overlap_iterations_a, overlap_iterations_b,
2835 last_conflicts, lnn);
2838 analyze_miv_subscript (chrec_a, chrec_b,
2839 overlap_iterations_a, overlap_iterations_b,
2840 last_conflicts, loop_nest);
2842 if (dump_file && (dump_flags & TDF_DETAILS))
2844 fprintf (dump_file, " (overlap_iterations_a = ");
2845 dump_conflict_function (dump_file, *overlap_iterations_a);
2846 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2847 dump_conflict_function (dump_file, *overlap_iterations_b);
2848 fprintf (dump_file, ")\n");
2849 fprintf (dump_file, ")\n");
2853 /* Helper function for uniquely inserting distance vectors. */
2856 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2861 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2862 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2865 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2868 /* Helper function for uniquely inserting direction vectors. */
2871 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2876 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2877 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2880 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2883 /* Add a distance of 1 on all the loops outer than INDEX. If we
2884 haven't yet determined a distance for this outer loop, push a new
2885 distance vector composed of the previous distance, and a distance
2886 of 1 for this outer loop. Example:
2894 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2895 save (0, 1), then we have to save (1, 0). */
2898 add_outer_distances (struct data_dependence_relation *ddr,
2899 lambda_vector dist_v, int index)
2901 /* For each outer loop where init_v is not set, the accesses are
2902 in dependence of distance 1 in the loop. */
2903 while (--index >= 0)
2905 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2906 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2908 save_dist_v (ddr, save_v);
2912 /* Return false when fail to represent the data dependence as a
2913 distance vector. INIT_B is set to true when a component has been
2914 added to the distance vector DIST_V. INDEX_CARRY is then set to
2915 the index in DIST_V that carries the dependence. */
2918 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2919 struct data_reference *ddr_a,
2920 struct data_reference *ddr_b,
2921 lambda_vector dist_v, bool *init_b,
2925 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2927 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2929 tree access_fn_a, access_fn_b;
2930 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2932 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2934 non_affine_dependence_relation (ddr);
2938 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2939 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2941 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2942 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2945 int var_a = CHREC_VARIABLE (access_fn_a);
2946 int var_b = CHREC_VARIABLE (access_fn_b);
2949 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2951 non_affine_dependence_relation (ddr);
2955 dist = int_cst_value (SUB_DISTANCE (subscript));
2956 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2957 *index_carry = MIN (index, *index_carry);
2959 /* This is the subscript coupling test. If we have already
2960 recorded a distance for this loop (a distance coming from
2961 another subscript), it should be the same. For example,
2962 in the following code, there is no dependence:
2969 if (init_v[index] != 0 && dist_v[index] != dist)
2971 finalize_ddr_dependent (ddr, chrec_known);
2975 dist_v[index] = dist;
2979 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2981 /* This can be for example an affine vs. constant dependence
2982 (T[i] vs. T[3]) that is not an affine dependence and is
2983 not representable as a distance vector. */
2984 non_affine_dependence_relation (ddr);
2992 /* Return true when the DDR contains only constant access functions. */
2995 constant_access_functions (const struct data_dependence_relation *ddr)
2999 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3000 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3001 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3007 /* Helper function for the case where DDR_A and DDR_B are the same
3008 multivariate access function with a constant step. For an example
3012 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3015 tree c_1 = CHREC_LEFT (c_2);
3016 tree c_0 = CHREC_LEFT (c_1);
3017 lambda_vector dist_v;
3020 /* Polynomials with more than 2 variables are not handled yet. When
3021 the evolution steps are parameters, it is not possible to
3022 represent the dependence using classical distance vectors. */
3023 if (TREE_CODE (c_0) != INTEGER_CST
3024 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3025 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3027 DDR_AFFINE_P (ddr) = false;
3031 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3032 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3034 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3035 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3036 v1 = int_cst_value (CHREC_RIGHT (c_1));
3037 v2 = int_cst_value (CHREC_RIGHT (c_2));
3050 save_dist_v (ddr, dist_v);
3052 add_outer_distances (ddr, dist_v, x_1);
3055 /* Helper function for the case where DDR_A and DDR_B are the same
3056 access functions. */
3059 add_other_self_distances (struct data_dependence_relation *ddr)
3061 lambda_vector dist_v;
3063 int index_carry = DDR_NB_LOOPS (ddr);
3065 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3067 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3069 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3071 if (!evolution_function_is_univariate_p (access_fun))
3073 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3075 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3079 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3081 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3082 add_multivariate_self_dist (ddr, access_fun);
3084 /* The evolution step is not constant: it varies in
3085 the outer loop, so this cannot be represented by a
3086 distance vector. For example in pr34635.c the
3087 evolution is {0, +, {0, +, 4}_1}_2. */
3088 DDR_AFFINE_P (ddr) = false;
3093 index_carry = MIN (index_carry,
3094 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3095 DDR_LOOP_NEST (ddr)));
3099 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3100 add_outer_distances (ddr, dist_v, index_carry);
3104 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3106 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3108 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3109 save_dist_v (ddr, dist_v);
3112 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3113 is the case for example when access functions are the same and
3114 equal to a constant, as in:
3121 in which case the distance vectors are (0) and (1). */
3124 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3128 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3130 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3131 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3132 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3134 for (j = 0; j < ca->n; j++)
3135 if (affine_function_zero_p (ca->fns[j]))
3137 insert_innermost_unit_dist_vector (ddr);
3141 for (j = 0; j < cb->n; j++)
3142 if (affine_function_zero_p (cb->fns[j]))
3144 insert_innermost_unit_dist_vector (ddr);
3150 /* Compute the classic per loop distance vector. DDR is the data
3151 dependence relation to build a vector from. Return false when fail
3152 to represent the data dependence as a distance vector. */
3155 build_classic_dist_vector (struct data_dependence_relation *ddr,
3156 struct loop *loop_nest)
3158 bool init_b = false;
3159 int index_carry = DDR_NB_LOOPS (ddr);
3160 lambda_vector dist_v;
3162 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3165 if (same_access_functions (ddr))
3167 /* Save the 0 vector. */
3168 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3169 save_dist_v (ddr, dist_v);
3171 if (constant_access_functions (ddr))
3172 add_distance_for_zero_overlaps (ddr);
3174 if (DDR_NB_LOOPS (ddr) > 1)
3175 add_other_self_distances (ddr);
3180 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3181 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3182 dist_v, &init_b, &index_carry))
3185 /* Save the distance vector if we initialized one. */
3188 /* Verify a basic constraint: classic distance vectors should
3189 always be lexicographically positive.
3191 Data references are collected in the order of execution of
3192 the program, thus for the following loop
3194 | for (i = 1; i < 100; i++)
3195 | for (j = 1; j < 100; j++)
3197 | t = T[j+1][i-1]; // A
3198 | T[j][i] = t + 2; // B
3201 references are collected following the direction of the wind:
3202 A then B. The data dependence tests are performed also
3203 following this order, such that we're looking at the distance
3204 separating the elements accessed by A from the elements later
3205 accessed by B. But in this example, the distance returned by
3206 test_dep (A, B) is lexicographically negative (-1, 1), that
3207 means that the access A occurs later than B with respect to
3208 the outer loop, ie. we're actually looking upwind. In this
3209 case we solve test_dep (B, A) looking downwind to the
3210 lexicographically positive solution, that returns the
3211 distance vector (1, -1). */
3212 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3214 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3215 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3218 compute_subscript_distance (ddr);
3219 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3220 save_v, &init_b, &index_carry))
3222 save_dist_v (ddr, save_v);
3223 DDR_REVERSED_P (ddr) = true;
3225 /* In this case there is a dependence forward for all the
3228 | for (k = 1; k < 100; k++)
3229 | for (i = 1; i < 100; i++)
3230 | for (j = 1; j < 100; j++)
3232 | t = T[j+1][i-1]; // A
3233 | T[j][i] = t + 2; // B
3241 if (DDR_NB_LOOPS (ddr) > 1)
3243 add_outer_distances (ddr, save_v, index_carry);
3244 add_outer_distances (ddr, dist_v, index_carry);
3249 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3250 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3252 if (DDR_NB_LOOPS (ddr) > 1)
3254 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3256 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3257 DDR_A (ddr), loop_nest))
3259 compute_subscript_distance (ddr);
3260 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3261 opposite_v, &init_b,
3265 save_dist_v (ddr, save_v);
3266 add_outer_distances (ddr, dist_v, index_carry);
3267 add_outer_distances (ddr, opposite_v, index_carry);
3270 save_dist_v (ddr, save_v);
3275 /* There is a distance of 1 on all the outer loops: Example:
3276 there is a dependence of distance 1 on loop_1 for the array A.
3282 add_outer_distances (ddr, dist_v,
3283 lambda_vector_first_nz (dist_v,
3284 DDR_NB_LOOPS (ddr), 0));
3287 if (dump_file && (dump_flags & TDF_DETAILS))
3291 fprintf (dump_file, "(build_classic_dist_vector\n");
3292 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3294 fprintf (dump_file, " dist_vector = (");
3295 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3296 DDR_NB_LOOPS (ddr));
3297 fprintf (dump_file, " )\n");
3299 fprintf (dump_file, ")\n");
3305 /* Return the direction for a given distance.
3306 FIXME: Computing dir this way is suboptimal, since dir can catch
3307 cases that dist is unable to represent. */
3309 static inline enum data_dependence_direction
3310 dir_from_dist (int dist)
3313 return dir_positive;
3315 return dir_negative;
3320 /* Compute the classic per loop direction vector. DDR is the data
3321 dependence relation to build a vector from. */
3324 build_classic_dir_vector (struct data_dependence_relation *ddr)
3327 lambda_vector dist_v;
3329 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3331 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3333 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3334 dir_v[j] = dir_from_dist (dist_v[j]);
3336 save_dir_v (ddr, dir_v);
3340 /* Helper function. Returns true when there is a dependence between
3341 data references DRA and DRB. */
3344 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3345 struct data_reference *dra,
3346 struct data_reference *drb,
3347 struct loop *loop_nest)
3350 tree last_conflicts;
3351 struct subscript *subscript;
3353 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3356 conflict_function *overlaps_a, *overlaps_b;
3358 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3359 DR_ACCESS_FN (drb, i),
3360 &overlaps_a, &overlaps_b,
3361 &last_conflicts, loop_nest);
3363 if (CF_NOT_KNOWN_P (overlaps_a)
3364 || CF_NOT_KNOWN_P (overlaps_b))
3366 finalize_ddr_dependent (ddr, chrec_dont_know);
3367 dependence_stats.num_dependence_undetermined++;
3368 free_conflict_function (overlaps_a);
3369 free_conflict_function (overlaps_b);
3373 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3374 || CF_NO_DEPENDENCE_P (overlaps_b))
3376 finalize_ddr_dependent (ddr, chrec_known);
3377 dependence_stats.num_dependence_independent++;
3378 free_conflict_function (overlaps_a);
3379 free_conflict_function (overlaps_b);
3385 if (SUB_CONFLICTS_IN_A (subscript))
3386 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3387 if (SUB_CONFLICTS_IN_B (subscript))
3388 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3390 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3391 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3392 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3399 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3402 subscript_dependence_tester (struct data_dependence_relation *ddr,
3403 struct loop *loop_nest)
3406 if (dump_file && (dump_flags & TDF_DETAILS))
3407 fprintf (dump_file, "(subscript_dependence_tester \n");
3409 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3410 dependence_stats.num_dependence_dependent++;
3412 compute_subscript_distance (ddr);
3413 if (build_classic_dist_vector (ddr, loop_nest))
3414 build_classic_dir_vector (ddr);
3416 if (dump_file && (dump_flags & TDF_DETAILS))
3417 fprintf (dump_file, ")\n");
3420 /* Returns true when all the access functions of A are affine or
3421 constant with respect to LOOP_NEST. */
3424 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3425 const struct loop *loop_nest)
3428 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3431 FOR_EACH_VEC_ELT (tree, fns, i, t)
3432 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3433 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3439 /* Initializes an equation for an OMEGA problem using the information
3440 contained in the ACCESS_FUN. Returns true when the operation
3443 PB is the omega constraint system.
3444 EQ is the number of the equation to be initialized.
3445 OFFSET is used for shifting the variables names in the constraints:
3446 a constrain is composed of 2 * the number of variables surrounding
3447 dependence accesses. OFFSET is set either to 0 for the first n variables,
3448 then it is set to n.
3449 ACCESS_FUN is expected to be an affine chrec. */
3452 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3453 unsigned int offset, tree access_fun,
3454 struct data_dependence_relation *ddr)
3456 switch (TREE_CODE (access_fun))
3458 case POLYNOMIAL_CHREC:
3460 tree left = CHREC_LEFT (access_fun);
3461 tree right = CHREC_RIGHT (access_fun);
3462 int var = CHREC_VARIABLE (access_fun);
3465 if (TREE_CODE (right) != INTEGER_CST)
3468 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3469 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3471 /* Compute the innermost loop index. */
3472 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3475 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3476 += int_cst_value (right);
3478 switch (TREE_CODE (left))
3480 case POLYNOMIAL_CHREC:
3481 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3484 pb->eqs[eq].coef[0] += int_cst_value (left);
3493 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3501 /* As explained in the comments preceding init_omega_for_ddr, we have
3502 to set up a system for each loop level, setting outer loops
3503 variation to zero, and current loop variation to positive or zero.
3504 Save each lexico positive distance vector. */
3507 omega_extract_distance_vectors (omega_pb pb,
3508 struct data_dependence_relation *ddr)
3512 struct loop *loopi, *loopj;
3513 enum omega_result res;
3515 /* Set a new problem for each loop in the nest. The basis is the
3516 problem that we have initialized until now. On top of this we
3517 add new constraints. */
3518 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3519 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3522 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3523 DDR_NB_LOOPS (ddr));
3525 omega_copy_problem (copy, pb);
3527 /* For all the outer loops "loop_j", add "dj = 0". */
3529 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3531 eq = omega_add_zero_eq (copy, omega_black);
3532 copy->eqs[eq].coef[j + 1] = 1;
3535 /* For "loop_i", add "0 <= di". */
3536 geq = omega_add_zero_geq (copy, omega_black);
3537 copy->geqs[geq].coef[i + 1] = 1;
3539 /* Reduce the constraint system, and test that the current
3540 problem is feasible. */
3541 res = omega_simplify_problem (copy);
3542 if (res == omega_false
3543 || res == omega_unknown
3544 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3547 for (eq = 0; eq < copy->num_subs; eq++)
3548 if (copy->subs[eq].key == (int) i + 1)
3550 dist = copy->subs[eq].coef[0];
3556 /* Reinitialize problem... */
3557 omega_copy_problem (copy, pb);
3559 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3561 eq = omega_add_zero_eq (copy, omega_black);
3562 copy->eqs[eq].coef[j + 1] = 1;
3565 /* ..., but this time "di = 1". */
3566 eq = omega_add_zero_eq (copy, omega_black);
3567 copy->eqs[eq].coef[i + 1] = 1;
3568 copy->eqs[eq].coef[0] = -1;
3570 res = omega_simplify_problem (copy);
3571 if (res == omega_false
3572 || res == omega_unknown
3573 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3576 for (eq = 0; eq < copy->num_subs; eq++)
3577 if (copy->subs[eq].key == (int) i + 1)
3579 dist = copy->subs[eq].coef[0];
3585 /* Save the lexicographically positive distance vector. */
3588 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3589 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3593 for (eq = 0; eq < copy->num_subs; eq++)
3594 if (copy->subs[eq].key > 0)
3596 dist = copy->subs[eq].coef[0];
3597 dist_v[copy->subs[eq].key - 1] = dist;
3600 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3601 dir_v[j] = dir_from_dist (dist_v[j]);
3603 save_dist_v (ddr, dist_v);
3604 save_dir_v (ddr, dir_v);
3608 omega_free_problem (copy);
3612 /* This is called for each subscript of a tuple of data references:
3613 insert an equality for representing the conflicts. */
3616 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3617 struct data_dependence_relation *ddr,
3618 omega_pb pb, bool *maybe_dependent)
3621 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3622 TREE_TYPE (access_fun_b));
3623 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3624 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3625 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3628 /* When the fun_a - fun_b is not constant, the dependence is not
3629 captured by the classic distance vector representation. */
3630 if (TREE_CODE (difference) != INTEGER_CST)
3634 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3636 /* There is no dependence. */
3637 *maybe_dependent = false;
3641 minus_one = build_int_cst (type, -1);
3642 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3644 eq = omega_add_zero_eq (pb, omega_black);
3645 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3646 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3647 /* There is probably a dependence, but the system of
3648 constraints cannot be built: answer "don't know". */
3652 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3653 && !int_divides_p (lambda_vector_gcd
3654 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3655 2 * DDR_NB_LOOPS (ddr)),
3656 pb->eqs[eq].coef[0]))
3658 /* There is no dependence. */
3659 *maybe_dependent = false;
3666 /* Helper function, same as init_omega_for_ddr but specialized for
3667 data references A and B. */
3670 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3671 struct data_dependence_relation *ddr,
3672 omega_pb pb, bool *maybe_dependent)
3677 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3679 /* Insert an equality per subscript. */
3680 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3682 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3683 ddr, pb, maybe_dependent))
3685 else if (*maybe_dependent == false)
3687 /* There is no dependence. */
3688 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3693 /* Insert inequalities: constraints corresponding to the iteration
3694 domain, i.e. the loops surrounding the references "loop_x" and
3695 the distance variables "dx". The layout of the OMEGA
3696 representation is as follows:
3697 - coef[0] is the constant
3698 - coef[1..nb_loops] are the protected variables that will not be
3699 removed by the solver: the "dx"
3700 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3702 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3703 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3705 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3708 ineq = omega_add_zero_geq (pb, omega_black);
3709 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3711 /* 0 <= loop_x + dx */
3712 ineq = omega_add_zero_geq (pb, omega_black);
3713 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3714 pb->geqs[ineq].coef[i + 1] = 1;
3718 /* loop_x <= nb_iters */
3719 ineq = omega_add_zero_geq (pb, omega_black);
3720 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3721 pb->geqs[ineq].coef[0] = nbi;
3723 /* loop_x + dx <= nb_iters */
3724 ineq = omega_add_zero_geq (pb, omega_black);
3725 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3726 pb->geqs[ineq].coef[i + 1] = -1;
3727 pb->geqs[ineq].coef[0] = nbi;
3729 /* A step "dx" bigger than nb_iters is not feasible, so
3730 add "0 <= nb_iters + dx", */
3731 ineq = omega_add_zero_geq (pb, omega_black);
3732 pb->geqs[ineq].coef[i + 1] = 1;
3733 pb->geqs[ineq].coef[0] = nbi;
3734 /* and "dx <= nb_iters". */
3735 ineq = omega_add_zero_geq (pb, omega_black);
3736 pb->geqs[ineq].coef[i + 1] = -1;
3737 pb->geqs[ineq].coef[0] = nbi;
3741 omega_extract_distance_vectors (pb, ddr);
3746 /* Sets up the Omega dependence problem for the data dependence
3747 relation DDR. Returns false when the constraint system cannot be
3748 built, ie. when the test answers "don't know". Returns true
3749 otherwise, and when independence has been proved (using one of the
3750 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3751 set MAYBE_DEPENDENT to true.
3753 Example: for setting up the dependence system corresponding to the
3754 conflicting accesses
3759 | ... A[2*j, 2*(i + j)]
3763 the following constraints come from the iteration domain:
3770 where di, dj are the distance variables. The constraints
3771 representing the conflicting elements are:
3774 i + 1 = 2 * (i + di + j + dj)
3776 For asking that the resulting distance vector (di, dj) be
3777 lexicographically positive, we insert the constraint "di >= 0". If
3778 "di = 0" in the solution, we fix that component to zero, and we
3779 look at the inner loops: we set a new problem where all the outer
3780 loop distances are zero, and fix this inner component to be
3781 positive. When one of the components is positive, we save that
3782 distance, and set a new problem where the distance on this loop is
3783 zero, searching for other distances in the inner loops. Here is
3784 the classic example that illustrates that we have to set for each
3785 inner loop a new problem:
3793 we have to save two distances (1, 0) and (0, 1).
3795 Given two array references, refA and refB, we have to set the
3796 dependence problem twice, refA vs. refB and refB vs. refA, and we
3797 cannot do a single test, as refB might occur before refA in the
3798 inner loops, and the contrary when considering outer loops: ex.
3803 | T[{1,+,1}_2][{1,+,1}_1] // refA
3804 | T[{2,+,1}_2][{0,+,1}_1] // refB
3809 refB touches the elements in T before refA, and thus for the same
3810 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3811 but for successive loop_0 iterations, we have (1, -1, 1)
3813 The Omega solver expects the distance variables ("di" in the
3814 previous example) to come first in the constraint system (as
3815 variables to be protected, or "safe" variables), the constraint
3816 system is built using the following layout:
3818 "cst | distance vars | index vars".
3822 init_omega_for_ddr (struct data_dependence_relation *ddr,
3823 bool *maybe_dependent)
3828 *maybe_dependent = true;
3830 if (same_access_functions (ddr))
3833 lambda_vector dir_v;
3835 /* Save the 0 vector. */
3836 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3837 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3838 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3839 dir_v[j] = dir_equal;
3840 save_dir_v (ddr, dir_v);
3842 /* Save the dependences carried by outer loops. */
3843 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3844 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3846 omega_free_problem (pb);
3850 /* Omega expects the protected variables (those that have to be kept
3851 after elimination) to appear first in the constraint system.
3852 These variables are the distance variables. In the following
3853 initialization we declare NB_LOOPS safe variables, and the total
3854 number of variables for the constraint system is 2*NB_LOOPS. */
3855 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3856 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3858 omega_free_problem (pb);
3860 /* Stop computation if not decidable, or no dependence. */
3861 if (res == false || *maybe_dependent == false)
3864 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3865 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3867 omega_free_problem (pb);
3872 /* Return true when DDR contains the same information as that stored
3873 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3876 ddr_consistent_p (FILE *file,
3877 struct data_dependence_relation *ddr,
3878 VEC (lambda_vector, heap) *dist_vects,
3879 VEC (lambda_vector, heap) *dir_vects)
3883 /* If dump_file is set, output there. */
3884 if (dump_file && (dump_flags & TDF_DETAILS))
3887 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3889 lambda_vector b_dist_v;
3890 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3891 VEC_length (lambda_vector, dist_vects),
3892 DDR_NUM_DIST_VECTS (ddr));
3894 fprintf (file, "Banerjee dist vectors:\n");
3895 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3896 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3898 fprintf (file, "Omega dist vectors:\n");
3899 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3900 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3902 fprintf (file, "data dependence relation:\n");
3903 dump_data_dependence_relation (file, ddr);
3905 fprintf (file, ")\n");
3909 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3911 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3912 VEC_length (lambda_vector, dir_vects),
3913 DDR_NUM_DIR_VECTS (ddr));
3917 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3919 lambda_vector a_dist_v;
3920 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3922 /* Distance vectors are not ordered in the same way in the DDR
3923 and in the DIST_VECTS: search for a matching vector. */
3924 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3925 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3928 if (j == VEC_length (lambda_vector, dist_vects))
3930 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3931 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3932 fprintf (file, "not found in Omega dist vectors:\n");
3933 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3934 fprintf (file, "data dependence relation:\n");
3935 dump_data_dependence_relation (file, ddr);
3936 fprintf (file, ")\n");
3940 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3942 lambda_vector a_dir_v;
3943 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3945 /* Direction vectors are not ordered in the same way in the DDR
3946 and in the DIR_VECTS: search for a matching vector. */
3947 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3948 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3951 if (j == VEC_length (lambda_vector, dist_vects))
3953 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3954 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3955 fprintf (file, "not found in Omega dir vectors:\n");
3956 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3957 fprintf (file, "data dependence relation:\n");
3958 dump_data_dependence_relation (file, ddr);
3959 fprintf (file, ")\n");
3966 /* This computes the affine dependence relation between A and B with
3967 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3968 independence between two accesses, while CHREC_DONT_KNOW is used
3969 for representing the unknown relation.
3971 Note that it is possible to stop the computation of the dependence
3972 relation the first time we detect a CHREC_KNOWN element for a given
3976 compute_affine_dependence (struct data_dependence_relation *ddr,
3977 struct loop *loop_nest)
3979 struct data_reference *dra = DDR_A (ddr);
3980 struct data_reference *drb = DDR_B (ddr);
3982 if (dump_file && (dump_flags & TDF_DETAILS))
3984 fprintf (dump_file, "(compute_affine_dependence\n");
3985 fprintf (dump_file, " (stmt_a = \n");
3986 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3987 fprintf (dump_file, ")\n (stmt_b = \n");
3988 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3989 fprintf (dump_file, ")\n");
3992 /* Analyze only when the dependence relation is not yet known. */
3993 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3994 && !DDR_SELF_REFERENCE (ddr))
3996 dependence_stats.num_dependence_tests++;
3998 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3999 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4001 if (flag_check_data_deps)
4003 /* Compute the dependences using the first algorithm. */
4004 subscript_dependence_tester (ddr, loop_nest);
4006 if (dump_file && (dump_flags & TDF_DETAILS))
4008 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4009 dump_data_dependence_relation (dump_file, ddr);
4012 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4014 bool maybe_dependent;
4015 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4017 /* Save the result of the first DD analyzer. */
4018 dist_vects = DDR_DIST_VECTS (ddr);
4019 dir_vects = DDR_DIR_VECTS (ddr);
4021 /* Reset the information. */
4022 DDR_DIST_VECTS (ddr) = NULL;
4023 DDR_DIR_VECTS (ddr) = NULL;
4025 /* Compute the same information using Omega. */
4026 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4027 goto csys_dont_know;
4029 if (dump_file && (dump_flags & TDF_DETAILS))
4031 fprintf (dump_file, "Omega Analyzer\n");
4032 dump_data_dependence_relation (dump_file, ddr);
4035 /* Check that we get the same information. */
4036 if (maybe_dependent)
4037 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4042 subscript_dependence_tester (ddr, loop_nest);
4045 /* As a last case, if the dependence cannot be determined, or if
4046 the dependence is considered too difficult to determine, answer
4051 dependence_stats.num_dependence_undetermined++;
4053 if (dump_file && (dump_flags & TDF_DETAILS))
4055 fprintf (dump_file, "Data ref a:\n");
4056 dump_data_reference (dump_file, dra);
4057 fprintf (dump_file, "Data ref b:\n");
4058 dump_data_reference (dump_file, drb);
4059 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4061 finalize_ddr_dependent (ddr, chrec_dont_know);
4065 if (dump_file && (dump_flags & TDF_DETAILS))
4066 fprintf (dump_file, ")\n");
4069 /* This computes the dependence relation for the same data
4070 reference into DDR. */
4073 compute_self_dependence (struct data_dependence_relation *ddr)
4076 struct subscript *subscript;
4078 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4081 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4084 if (SUB_CONFLICTS_IN_A (subscript))
4085 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4086 if (SUB_CONFLICTS_IN_B (subscript))
4087 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4089 /* The accessed index overlaps for each iteration. */
4090 SUB_CONFLICTS_IN_A (subscript)
4091 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4092 SUB_CONFLICTS_IN_B (subscript)
4093 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4094 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4097 /* The distance vector is the zero vector. */
4098 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4099 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4102 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4103 the data references in DATAREFS, in the LOOP_NEST. When
4104 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4108 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4109 VEC (ddr_p, heap) **dependence_relations,
4110 VEC (loop_p, heap) *loop_nest,
4111 bool compute_self_and_rr)
4113 struct data_dependence_relation *ddr;
4114 struct data_reference *a, *b;
4117 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4118 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4119 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4121 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4122 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4124 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4127 if (compute_self_and_rr)
4128 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4130 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4131 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4132 compute_self_dependence (ddr);
4136 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4137 true if STMT clobbers memory, false otherwise. */
4140 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4142 bool clobbers_memory = false;
4145 enum gimple_code stmt_code = gimple_code (stmt);
4149 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4150 Calls have side-effects, except those to const or pure
4152 if ((stmt_code == GIMPLE_CALL
4153 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4154 || (stmt_code == GIMPLE_ASM
4155 && gimple_asm_volatile_p (stmt)))
4156 clobbers_memory = true;
4158 if (!gimple_vuse (stmt))
4159 return clobbers_memory;
4161 if (stmt_code == GIMPLE_ASSIGN)
4164 op0 = gimple_assign_lhs_ptr (stmt);
4165 op1 = gimple_assign_rhs1_ptr (stmt);
4168 || (REFERENCE_CLASS_P (*op1)
4169 && (base = get_base_address (*op1))
4170 && TREE_CODE (base) != SSA_NAME))
4172 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4174 ref->is_read = true;
4178 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4180 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4182 ref->is_read = false;
4185 else if (stmt_code == GIMPLE_CALL)
4187 unsigned i, n = gimple_call_num_args (stmt);
4189 for (i = 0; i < n; i++)
4191 op0 = gimple_call_arg_ptr (stmt, i);
4194 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4196 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4198 ref->is_read = true;
4203 return clobbers_memory;
4206 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4207 reference, returns false, otherwise returns true. NEST is the outermost
4208 loop of the loop nest in which the references should be analyzed. */
4211 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4212 VEC (data_reference_p, heap) **datarefs)
4215 VEC (data_ref_loc, heap) *references;
4218 data_reference_p dr;
4220 if (get_references_in_stmt (stmt, &references))
4222 VEC_free (data_ref_loc, heap, references);
4226 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4228 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4229 *ref->pos, stmt, ref->is_read);
4230 gcc_assert (dr != NULL);
4232 /* FIXME -- data dependence analysis does not work correctly for objects
4233 with invariant addresses in loop nests. Let us fail here until the
4234 problem is fixed. */
4235 if (dr_address_invariant_p (dr) && nest)
4238 if (dump_file && (dump_flags & TDF_DETAILS))
4239 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4244 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4246 VEC_free (data_ref_loc, heap, references);
4250 /* Stores the data references in STMT to DATAREFS. If there is an
4251 unanalyzable reference, returns false, otherwise returns true.
4252 NEST is the outermost loop of the loop nest in which the references
4253 should be instantiated, LOOP is the loop in which the references
4254 should be analyzed. */
4257 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4258 VEC (data_reference_p, heap) **datarefs)
4261 VEC (data_ref_loc, heap) *references;
4264 data_reference_p dr;
4266 if (get_references_in_stmt (stmt, &references))
4268 VEC_free (data_ref_loc, heap, references);
4272 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4274 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4275 gcc_assert (dr != NULL);
4276 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4279 VEC_free (data_ref_loc, heap, references);
4283 /* Search the data references in LOOP, and record the information into
4284 DATAREFS. Returns chrec_dont_know when failing to analyze a
4285 difficult case, returns NULL_TREE otherwise. */
4288 find_data_references_in_bb (struct loop *loop, basic_block bb,
4289 VEC (data_reference_p, heap) **datarefs)
4291 gimple_stmt_iterator bsi;
4293 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4295 gimple stmt = gsi_stmt (bsi);
4297 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4299 struct data_reference *res;
4300 res = XCNEW (struct data_reference);
4301 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4303 return chrec_dont_know;
4310 /* Search the data references in LOOP, and record the information into
4311 DATAREFS. Returns chrec_dont_know when failing to analyze a
4312 difficult case, returns NULL_TREE otherwise.
4314 TODO: This function should be made smarter so that it can handle address
4315 arithmetic as if they were array accesses, etc. */
4318 find_data_references_in_loop (struct loop *loop,
4319 VEC (data_reference_p, heap) **datarefs)
4321 basic_block bb, *bbs;
4324 bbs = get_loop_body_in_dom_order (loop);
4326 for (i = 0; i < loop->num_nodes; i++)
4330 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4333 return chrec_dont_know;
4341 /* Recursive helper function. */
4344 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4346 /* Inner loops of the nest should not contain siblings. Example:
4347 when there are two consecutive loops,
4358 the dependence relation cannot be captured by the distance
4363 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4365 return find_loop_nest_1 (loop->inner, loop_nest);
4369 /* Return false when the LOOP is not well nested. Otherwise return
4370 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4371 contain the loops from the outermost to the innermost, as they will
4372 appear in the classic distance vector. */
4375 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4377 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4379 return find_loop_nest_1 (loop->inner, loop_nest);
4383 /* Returns true when the data dependences have been computed, false otherwise.
4384 Given a loop nest LOOP, the following vectors are returned:
4385 DATAREFS is initialized to all the array elements contained in this loop,
4386 DEPENDENCE_RELATIONS contains the relations between the data references.
4387 Compute read-read and self relations if
4388 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4391 compute_data_dependences_for_loop (struct loop *loop,
4392 bool compute_self_and_read_read_dependences,
4393 VEC (loop_p, heap) **loop_nest,
4394 VEC (data_reference_p, heap) **datarefs,
4395 VEC (ddr_p, heap) **dependence_relations)
4399 memset (&dependence_stats, 0, sizeof (dependence_stats));
4401 /* If the loop nest is not well formed, or one of the data references
4402 is not computable, give up without spending time to compute other
4405 || !find_loop_nest (loop, loop_nest)
4406 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4408 struct data_dependence_relation *ddr;
4410 /* Insert a single relation into dependence_relations:
4412 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4413 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4417 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4418 compute_self_and_read_read_dependences);
4420 if (dump_file && (dump_flags & TDF_STATS))
4422 fprintf (dump_file, "Dependence tester statistics:\n");
4424 fprintf (dump_file, "Number of dependence tests: %d\n",
4425 dependence_stats.num_dependence_tests);
4426 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4427 dependence_stats.num_dependence_dependent);
4428 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4429 dependence_stats.num_dependence_independent);
4430 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4431 dependence_stats.num_dependence_undetermined);
4433 fprintf (dump_file, "Number of subscript tests: %d\n",
4434 dependence_stats.num_subscript_tests);
4435 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4436 dependence_stats.num_subscript_undetermined);
4437 fprintf (dump_file, "Number of same subscript function: %d\n",
4438 dependence_stats.num_same_subscript_function);
4440 fprintf (dump_file, "Number of ziv tests: %d\n",
4441 dependence_stats.num_ziv);
4442 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4443 dependence_stats.num_ziv_dependent);
4444 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4445 dependence_stats.num_ziv_independent);
4446 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4447 dependence_stats.num_ziv_unimplemented);
4449 fprintf (dump_file, "Number of siv tests: %d\n",
4450 dependence_stats.num_siv);
4451 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4452 dependence_stats.num_siv_dependent);
4453 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4454 dependence_stats.num_siv_independent);
4455 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4456 dependence_stats.num_siv_unimplemented);
4458 fprintf (dump_file, "Number of miv tests: %d\n",
4459 dependence_stats.num_miv);
4460 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4461 dependence_stats.num_miv_dependent);
4462 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4463 dependence_stats.num_miv_independent);
4464 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4465 dependence_stats.num_miv_unimplemented);
4471 /* Returns true when the data dependences for the basic block BB have been
4472 computed, false otherwise.
4473 DATAREFS is initialized to all the array elements contained in this basic
4474 block, DEPENDENCE_RELATIONS contains the relations between the data
4475 references. Compute read-read and self relations if
4476 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4478 compute_data_dependences_for_bb (basic_block bb,
4479 bool compute_self_and_read_read_dependences,
4480 VEC (data_reference_p, heap) **datarefs,
4481 VEC (ddr_p, heap) **dependence_relations)
4483 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4486 compute_all_dependences (*datarefs, dependence_relations, NULL,
4487 compute_self_and_read_read_dependences);
4491 /* Entry point (for testing only). Analyze all the data references
4492 and the dependence relations in LOOP.
4494 The data references are computed first.
4496 A relation on these nodes is represented by a complete graph. Some
4497 of the relations could be of no interest, thus the relations can be
4500 In the following function we compute all the relations. This is
4501 just a first implementation that is here for:
4502 - for showing how to ask for the dependence relations,
4503 - for the debugging the whole dependence graph,
4504 - for the dejagnu testcases and maintenance.
4506 It is possible to ask only for a part of the graph, avoiding to
4507 compute the whole dependence graph. The computed dependences are
4508 stored in a knowledge base (KB) such that later queries don't
4509 recompute the same information. The implementation of this KB is
4510 transparent to the optimizer, and thus the KB can be changed with a
4511 more efficient implementation, or the KB could be disabled. */
4513 analyze_all_data_dependences (struct loop *loop)
4516 int nb_data_refs = 10;
4517 VEC (data_reference_p, heap) *datarefs =
4518 VEC_alloc (data_reference_p, heap, nb_data_refs);
4519 VEC (ddr_p, heap) *dependence_relations =
4520 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4521 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4523 /* Compute DDs on the whole function. */
4524 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4525 &dependence_relations);
4529 dump_data_dependence_relations (dump_file, dependence_relations);
4530 fprintf (dump_file, "\n\n");
4532 if (dump_flags & TDF_DETAILS)
4533 dump_dist_dir_vectors (dump_file, dependence_relations);
4535 if (dump_flags & TDF_STATS)
4537 unsigned nb_top_relations = 0;
4538 unsigned nb_bot_relations = 0;
4539 unsigned nb_chrec_relations = 0;
4540 struct data_dependence_relation *ddr;
4542 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4544 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4547 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4551 nb_chrec_relations++;
4554 gather_stats_on_scev_database ();
4558 VEC_free (loop_p, heap, loop_nest);
4559 free_dependence_relations (dependence_relations);
4560 free_data_refs (datarefs);
4563 /* Computes all the data dependences and check that the results of
4564 several analyzers are the same. */
4567 tree_check_data_deps (void)
4570 struct loop *loop_nest;
4572 FOR_EACH_LOOP (li, loop_nest, 0)
4573 analyze_all_data_dependences (loop_nest);
4576 /* Free the memory used by a data dependence relation DDR. */
4579 free_dependence_relation (struct data_dependence_relation *ddr)
4584 if (DDR_SUBSCRIPTS (ddr))
4585 free_subscripts (DDR_SUBSCRIPTS (ddr));
4586 if (DDR_DIST_VECTS (ddr))
4587 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4588 if (DDR_DIR_VECTS (ddr))
4589 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4594 /* Free the memory used by the data dependence relations from
4595 DEPENDENCE_RELATIONS. */
4598 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4601 struct data_dependence_relation *ddr;
4603 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4605 free_dependence_relation (ddr);
4607 VEC_free (ddr_p, heap, dependence_relations);
4610 /* Free the memory used by the data references from DATAREFS. */
4613 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4616 struct data_reference *dr;
4618 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4620 VEC_free (data_reference_p, heap, datarefs);
4625 /* Dump vertex I in RDG to FILE. */
4628 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4630 struct vertex *v = &(rdg->vertices[i]);
4631 struct graph_edge *e;
4633 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4634 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4635 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4638 for (e = v->pred; e; e = e->pred_next)
4639 fprintf (file, " %d", e->src);
4641 fprintf (file, ") (out:");
4644 for (e = v->succ; e; e = e->succ_next)
4645 fprintf (file, " %d", e->dest);
4647 fprintf (file, ")\n");
4648 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4649 fprintf (file, ")\n");
4652 /* Call dump_rdg_vertex on stderr. */
4655 debug_rdg_vertex (struct graph *rdg, int i)
4657 dump_rdg_vertex (stderr, rdg, i);
4660 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4661 dumped vertices to that bitmap. */
4663 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4667 fprintf (file, "(%d\n", c);
4669 for (i = 0; i < rdg->n_vertices; i++)
4670 if (rdg->vertices[i].component == c)
4673 bitmap_set_bit (dumped, i);
4675 dump_rdg_vertex (file, rdg, i);
4678 fprintf (file, ")\n");
4681 /* Call dump_rdg_vertex on stderr. */
4684 debug_rdg_component (struct graph *rdg, int c)
4686 dump_rdg_component (stderr, rdg, c, NULL);
4689 /* Dump the reduced dependence graph RDG to FILE. */
4692 dump_rdg (FILE *file, struct graph *rdg)
4695 bitmap dumped = BITMAP_ALLOC (NULL);
4697 fprintf (file, "(rdg\n");
4699 for (i = 0; i < rdg->n_vertices; i++)
4700 if (!bitmap_bit_p (dumped, i))
4701 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4703 fprintf (file, ")\n");
4704 BITMAP_FREE (dumped);
4707 /* Call dump_rdg on stderr. */
4710 debug_rdg (struct graph *rdg)
4712 dump_rdg (stderr, rdg);
4716 dot_rdg_1 (FILE *file, struct graph *rdg)
4720 fprintf (file, "digraph RDG {\n");
4722 for (i = 0; i < rdg->n_vertices; i++)
4724 struct vertex *v = &(rdg->vertices[i]);
4725 struct graph_edge *e;
4727 /* Highlight reads from memory. */
4728 if (RDG_MEM_READS_STMT (rdg, i))
4729 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4731 /* Highlight stores to memory. */
4732 if (RDG_MEM_WRITE_STMT (rdg, i))
4733 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4736 for (e = v->succ; e; e = e->succ_next)
4737 switch (RDGE_TYPE (e))
4740 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4744 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4748 /* These are the most common dependences: don't print these. */
4749 fprintf (file, "%d -> %d \n", i, e->dest);
4753 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4761 fprintf (file, "}\n\n");
4764 /* Display the Reduced Dependence Graph using dotty. */
4765 extern void dot_rdg (struct graph *);
4768 dot_rdg (struct graph *rdg)
4770 /* When debugging, enable the following code. This cannot be used
4771 in production compilers because it calls "system". */
4773 FILE *file = fopen ("/tmp/rdg.dot", "w");
4774 gcc_assert (file != NULL);
4776 dot_rdg_1 (file, rdg);
4779 system ("dotty /tmp/rdg.dot &");
4781 dot_rdg_1 (stderr, rdg);
4785 /* This structure is used for recording the mapping statement index in
4788 struct GTY(()) rdg_vertex_info
4794 /* Returns the index of STMT in RDG. */
4797 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4799 struct rdg_vertex_info rvi, *slot;
4802 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4810 /* Creates an edge in RDG for each distance vector from DDR. The
4811 order that we keep track of in the RDG is the order in which
4812 statements have to be executed. */
4815 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4817 struct graph_edge *e;
4819 data_reference_p dra = DDR_A (ddr);
4820 data_reference_p drb = DDR_B (ddr);
4821 unsigned level = ddr_dependence_level (ddr);
4823 /* For non scalar dependences, when the dependence is REVERSED,
4824 statement B has to be executed before statement A. */
4826 && !DDR_REVERSED_P (ddr))
4828 data_reference_p tmp = dra;
4833 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4834 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4836 if (va < 0 || vb < 0)
4839 e = add_edge (rdg, va, vb);
4840 e->data = XNEW (struct rdg_edge);
4842 RDGE_LEVEL (e) = level;
4843 RDGE_RELATION (e) = ddr;
4845 /* Determines the type of the data dependence. */
4846 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4847 RDGE_TYPE (e) = input_dd;
4848 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4849 RDGE_TYPE (e) = output_dd;
4850 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4851 RDGE_TYPE (e) = flow_dd;
4852 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4853 RDGE_TYPE (e) = anti_dd;
4856 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4857 the index of DEF in RDG. */
4860 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4862 use_operand_p imm_use_p;
4863 imm_use_iterator iterator;
4865 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4867 struct graph_edge *e;
4868 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4873 e = add_edge (rdg, idef, use);
4874 e->data = XNEW (struct rdg_edge);
4875 RDGE_TYPE (e) = flow_dd;
4876 RDGE_RELATION (e) = NULL;
4880 /* Creates the edges of the reduced dependence graph RDG. */
4883 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4886 struct data_dependence_relation *ddr;
4887 def_operand_p def_p;
4890 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4891 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4892 create_rdg_edge_for_ddr (rdg, ddr);
4894 for (i = 0; i < rdg->n_vertices; i++)
4895 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4897 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4900 /* Build the vertices of the reduced dependence graph RDG. */
4903 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4908 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4910 VEC (data_ref_loc, heap) *references;
4912 struct vertex *v = &(rdg->vertices[i]);
4913 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4914 struct rdg_vertex_info **slot;
4918 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4925 v->data = XNEW (struct rdg_vertex);
4926 RDG_STMT (rdg, i) = stmt;
4928 RDG_MEM_WRITE_STMT (rdg, i) = false;
4929 RDG_MEM_READS_STMT (rdg, i) = false;
4930 if (gimple_code (stmt) == GIMPLE_PHI)
4933 get_references_in_stmt (stmt, &references);
4934 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4936 RDG_MEM_WRITE_STMT (rdg, i) = true;
4938 RDG_MEM_READS_STMT (rdg, i) = true;
4940 VEC_free (data_ref_loc, heap, references);
4944 /* Initialize STMTS with all the statements of LOOP. When
4945 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4946 which we discover statements is important as
4947 generate_loops_for_partition is using the same traversal for
4948 identifying statements. */
4951 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4954 basic_block *bbs = get_loop_body_in_dom_order (loop);
4956 for (i = 0; i < loop->num_nodes; i++)
4958 basic_block bb = bbs[i];
4959 gimple_stmt_iterator bsi;
4962 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4963 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4965 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4967 stmt = gsi_stmt (bsi);
4968 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4969 VEC_safe_push (gimple, heap, *stmts, stmt);
4976 /* Returns true when all the dependences are computable. */
4979 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4984 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4985 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4991 /* Computes a hash function for element ELT. */
4994 hash_stmt_vertex_info (const void *elt)
4996 const struct rdg_vertex_info *const rvi =
4997 (const struct rdg_vertex_info *) elt;
4998 gimple stmt = rvi->stmt;
5000 return htab_hash_pointer (stmt);
5003 /* Compares database elements E1 and E2. */
5006 eq_stmt_vertex_info (const void *e1, const void *e2)
5008 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5009 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5011 return elt1->stmt == elt2->stmt;
5014 /* Free the element E. */
5017 hash_stmt_vertex_del (void *e)
5022 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5023 statement of the loop nest, and one edge per data dependence or
5024 scalar dependence. */
5027 build_empty_rdg (int n_stmts)
5029 int nb_data_refs = 10;
5030 struct graph *rdg = new_graph (n_stmts);
5032 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5033 eq_stmt_vertex_info, hash_stmt_vertex_del);
5037 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5038 statement of the loop nest, and one edge per data dependence or
5039 scalar dependence. */
5042 build_rdg (struct loop *loop,
5043 VEC (loop_p, heap) **loop_nest,
5044 VEC (ddr_p, heap) **dependence_relations,
5045 VEC (data_reference_p, heap) **datarefs)
5047 struct graph *rdg = NULL;
5048 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5050 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5051 dependence_relations);
5053 if (known_dependences_p (*dependence_relations))
5055 stmts_from_loop (loop, &stmts);
5056 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5057 create_rdg_vertices (rdg, stmts);
5058 create_rdg_edges (rdg, *dependence_relations);
5061 VEC_free (gimple, heap, stmts);
5065 /* Free the reduced dependence graph RDG. */
5068 free_rdg (struct graph *rdg)
5072 for (i = 0; i < rdg->n_vertices; i++)
5074 struct vertex *v = &(rdg->vertices[i]);
5075 struct graph_edge *e;
5077 for (e = v->succ; e; e = e->succ_next)
5083 htab_delete (rdg->indices);
5087 /* Initialize STMTS with all the statements of LOOP that contain a
5091 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5094 basic_block *bbs = get_loop_body_in_dom_order (loop);
5096 for (i = 0; i < loop->num_nodes; i++)
5098 basic_block bb = bbs[i];
5099 gimple_stmt_iterator bsi;
5101 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5102 if (gimple_vdef (gsi_stmt (bsi)))
5103 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5109 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5110 that contains a data reference on its LHS with a stride of the same
5111 size as its unit type. */
5114 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5118 struct data_reference *dr;
5121 || !gimple_vdef (stmt)
5122 || !is_gimple_assign (stmt)
5123 || !gimple_assign_single_p (stmt)
5124 || !(op1 = gimple_assign_rhs1 (stmt))
5125 || !(integer_zerop (op1) || real_zerop (op1)))
5128 dr = XCNEW (struct data_reference);
5129 op0 = gimple_assign_lhs (stmt);
5131 DR_STMT (dr) = stmt;
5134 res = dr_analyze_innermost (dr)
5135 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5141 /* Initialize STMTS with all the statements of LOOP that contain a
5142 store to memory of the form "A[i] = 0". */
5145 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5149 gimple_stmt_iterator si;
5151 basic_block *bbs = get_loop_body_in_dom_order (loop);
5153 for (i = 0; i < loop->num_nodes; i++)
5154 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5155 if ((stmt = gsi_stmt (si))
5156 && stmt_with_adjacent_zero_store_dr_p (stmt))
5157 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5162 /* For a data reference REF, return the declaration of its base
5163 address or NULL_TREE if the base is not determined. */
5166 ref_base_address (gimple stmt, data_ref_loc *ref)
5168 tree base = NULL_TREE;
5170 struct data_reference *dr = XCNEW (struct data_reference);
5172 DR_STMT (dr) = stmt;
5173 DR_REF (dr) = *ref->pos;
5174 dr_analyze_innermost (dr);
5175 base_address = DR_BASE_ADDRESS (dr);
5180 switch (TREE_CODE (base_address))
5183 base = TREE_OPERAND (base_address, 0);
5187 base = base_address;
5196 /* Determines whether the statement from vertex V of the RDG has a
5197 definition used outside the loop that contains this statement. */
5200 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5202 gimple stmt = RDG_STMT (rdg, v);
5203 struct loop *loop = loop_containing_stmt (stmt);
5204 use_operand_p imm_use_p;
5205 imm_use_iterator iterator;
5207 def_operand_p def_p;
5212 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5214 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5216 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5224 /* Determines whether statements S1 and S2 access to similar memory
5225 locations. Two memory accesses are considered similar when they
5226 have the same base address declaration, i.e. when their
5227 ref_base_address is the same. */
5230 have_similar_memory_accesses (gimple s1, gimple s2)
5234 VEC (data_ref_loc, heap) *refs1, *refs2;
5235 data_ref_loc *ref1, *ref2;
5237 get_references_in_stmt (s1, &refs1);
5238 get_references_in_stmt (s2, &refs2);
5240 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5242 tree base1 = ref_base_address (s1, ref1);
5245 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5246 if (base1 == ref_base_address (s2, ref2))
5254 VEC_free (data_ref_loc, heap, refs1);
5255 VEC_free (data_ref_loc, heap, refs2);
5259 /* Helper function for the hashtab. */
5262 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5264 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5265 CONST_CAST_GIMPLE ((const_gimple) s2));
5268 /* Helper function for the hashtab. */
5271 ref_base_address_1 (const void *s)
5273 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5275 VEC (data_ref_loc, heap) *refs;
5279 get_references_in_stmt (stmt, &refs);
5281 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5284 res = htab_hash_pointer (ref_base_address (stmt, ref));
5288 VEC_free (data_ref_loc, heap, refs);
5292 /* Try to remove duplicated write data references from STMTS. */
5295 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5299 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5300 have_similar_memory_accesses_1, NULL);
5302 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5306 slot = htab_find_slot (seen, stmt, INSERT);
5309 VEC_ordered_remove (gimple, *stmts, i);
5312 *slot = (void *) stmt;
5320 /* Returns the index of PARAMETER in the parameters vector of the
5321 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5324 access_matrix_get_index_for_parameter (tree parameter,
5325 struct access_matrix *access_matrix)
5328 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5329 tree lambda_parameter;
5331 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5332 if (lambda_parameter == parameter)
5333 return i + AM_NB_INDUCTION_VARS (access_matrix);