1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
119 /* Returns true iff A divides B. */
122 tree_fold_divides_p (const_tree a, const_tree b)
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
129 /* Returns true iff A divides B. */
132 int_divides_p (int a, int b)
134 return ((b % a) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145 struct data_reference *dr;
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
151 /* Dump into STDERR all the data references from DATAREFS. */
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 dump_data_references (stderr, datarefs);
159 /* Dump to STDERR all the dependence relations from DDRS. */
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
174 struct data_dependence_relation *ddr;
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
180 /* Print to STDERR the data_reference DR. */
183 debug_data_reference (struct data_reference *dr)
185 dump_data_reference (stderr, dr);
188 /* Dump function for a DATA_REFERENCE structure. */
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 fprintf (outf, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
216 dump_affine_function (FILE *outf, affine_fn fn)
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
230 /* Dumps the conflict function CF to the file OUTF. */
233 dump_conflict_function (FILE *outf, conflict_function *cf)
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
243 for (i = 0; i < cf->n; i++)
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
255 dump_subscript (FILE *outf, struct subscript *subscript)
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
288 print_direction_vector (FILE *outf,
294 for (eq = 0; eq < length; eq++)
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
302 fprintf (outf, " +");
305 fprintf (outf, " -");
308 fprintf (outf, " =");
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
320 fprintf (outf, " *");
323 fprintf (outf, "indep");
327 fprintf (outf, "\n");
330 /* Print a vector of direction vectors. */
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
343 /* Print out a vector VEC of length N to OUTFILE. */
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
355 /* Print a vector of distance vectors. */
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 dump_data_dependence_relation (stderr, ddr);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
382 struct data_reference *dra, *drb;
384 fprintf (outf, "(Data Dep: \n");
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
393 dump_data_reference (outf, dra);
395 fprintf (outf, " (nil)\n");
397 dump_data_reference (outf, drb);
399 fprintf (outf, " (nil)\n");
401 fprintf (outf, " (don't know)\n)\n");
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
448 fprintf (outf, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
501 struct data_dependence_relation *ddr;
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
522 fprintf (file, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
531 struct data_dependence_relation *ddr;
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
536 fprintf (file, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
551 enum tree_code ocode = code;
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
563 case POINTER_PLUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
575 if (TREE_CODE (op1) != INTEGER_CST)
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
597 if (pbitpos % BITS_PER_UNIT != 0)
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
608 base, fold_convert (sizetype, poffset));
610 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
611 fold_convert (TREE_TYPE (base), poffset));
614 var0 = fold_convert (type, base);
616 /* If variable length types are involved, punt, otherwise casts
617 might be converted into ARRAY_REFs in gimplify_conversion.
618 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
619 possibly no longer appears in current GIMPLE, might resurface.
620 This perhaps could run
621 if (CONVERT_EXPR_P (var0))
623 gimplify_conversion (&var0);
624 // Attempt to fill in any within var0 found ARRAY_REF's
625 // element size from corresponding op embedded ARRAY_REF,
626 // if unsuccessful, just punt.
628 while (POINTER_TYPE_P (type))
629 type = TREE_TYPE (type);
630 if (int_size_in_bytes (type) < 0)
640 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
641 enum tree_code subcode;
643 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
646 var0 = gimple_assign_rhs1 (def_stmt);
647 subcode = gimple_assign_rhs_code (def_stmt);
648 var1 = gimple_assign_rhs2 (def_stmt);
650 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
654 /* We must not introduce undefined overflow, and we must not change the value.
655 Hence we're okay if the inner type doesn't overflow to start with
656 (pointer or signed), the outer type also is an integer or pointer
657 and the outer precision is at least as large as the inner. */
658 tree itype = TREE_TYPE (op0);
659 if ((POINTER_TYPE_P (itype)
660 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
661 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
662 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
664 split_constant_offset (op0, &var0, off);
665 *var = fold_convert (type, var0);
676 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
677 will be ssizetype. */
680 split_constant_offset (tree exp, tree *var, tree *off)
682 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
686 *off = ssize_int (0);
689 if (automatically_generated_chrec_p (exp))
692 otype = TREE_TYPE (exp);
693 code = TREE_CODE (exp);
694 extract_ops_from_tree (exp, &code, &op0, &op1);
695 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
697 *var = fold_convert (type, e);
702 /* Returns the address ADDR of an object in a canonical shape (without nop
703 casts, and with type of pointer to the object). */
706 canonicalize_base_object_address (tree addr)
712 /* The base address may be obtained by casting from integer, in that case
714 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
717 if (TREE_CODE (addr) != ADDR_EXPR)
720 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
723 /* Analyzes the behavior of the memory reference DR in the innermost loop or
724 basic block that contains it. Returns true if analysis succeed or false
728 dr_analyze_innermost (struct data_reference *dr)
730 gimple stmt = DR_STMT (dr);
731 struct loop *loop = loop_containing_stmt (stmt);
732 tree ref = DR_REF (dr);
733 HOST_WIDE_INT pbitsize, pbitpos;
735 enum machine_mode pmode;
736 int punsignedp, pvolatilep;
737 affine_iv base_iv, offset_iv;
738 tree init, dinit, step;
739 bool in_loop = (loop && loop->num);
741 if (dump_file && (dump_flags & TDF_DETAILS))
742 fprintf (dump_file, "analyze_innermost: ");
744 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
745 &pmode, &punsignedp, &pvolatilep, false);
746 gcc_assert (base != NULL_TREE);
748 if (pbitpos % BITS_PER_UNIT != 0)
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 fprintf (dump_file, "failed: bit offset alignment.\n");
755 if (TREE_CODE (base) == MEM_REF)
757 if (!integer_zerop (TREE_OPERAND (base, 1)))
761 double_int moff = mem_ref_offset (base);
762 poffset = double_int_to_tree (sizetype, moff);
765 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
767 base = TREE_OPERAND (base, 0);
770 base = build_fold_addr_expr (base);
773 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, "failed: evolution of base is not affine.\n");
784 base_iv.step = ssize_int (0);
785 base_iv.no_overflow = true;
790 offset_iv.base = ssize_int (0);
791 offset_iv.step = ssize_int (0);
797 offset_iv.base = poffset;
798 offset_iv.step = ssize_int (0);
800 else if (!simple_iv (loop, loop_containing_stmt (stmt),
801 poffset, &offset_iv, false))
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "failed: evolution of offset is not"
810 init = ssize_int (pbitpos / BITS_PER_UNIT);
811 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
812 init = size_binop (PLUS_EXPR, init, dinit);
813 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
814 init = size_binop (PLUS_EXPR, init, dinit);
816 step = size_binop (PLUS_EXPR,
817 fold_convert (ssizetype, base_iv.step),
818 fold_convert (ssizetype, offset_iv.step));
820 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
822 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
826 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
828 if (dump_file && (dump_flags & TDF_DETAILS))
829 fprintf (dump_file, "success.\n");
834 /* Determines the base object and the list of indices of memory reference
835 DR, analyzed in LOOP and instantiated in loop nest NEST. */
838 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
840 VEC (tree, heap) *access_fns = NULL;
841 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
842 tree base, off, access_fn = NULL_TREE;
843 basic_block before_loop = NULL;
846 before_loop = block_before_loop (nest);
848 while (handled_component_p (aref))
850 if (TREE_CODE (aref) == ARRAY_REF)
852 op = TREE_OPERAND (aref, 1);
855 access_fn = analyze_scalar_evolution (loop, op);
856 access_fn = instantiate_scev (before_loop, loop, access_fn);
857 VEC_safe_push (tree, heap, access_fns, access_fn);
860 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
863 aref = TREE_OPERAND (aref, 0);
867 && (INDIRECT_REF_P (aref)
868 || TREE_CODE (aref) == MEM_REF))
870 op = TREE_OPERAND (aref, 0);
871 access_fn = analyze_scalar_evolution (loop, op);
872 access_fn = instantiate_scev (before_loop, loop, access_fn);
873 base = initial_condition (access_fn);
874 split_constant_offset (base, &base, &off);
875 if (TREE_CODE (aref) == MEM_REF)
876 off = size_binop (PLUS_EXPR, off,
877 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
878 access_fn = chrec_replace_initial_condition (access_fn,
879 fold_convert (TREE_TYPE (base), off));
881 TREE_OPERAND (aref, 0) = base;
882 VEC_safe_push (tree, heap, access_fns, access_fn);
885 if (TREE_CODE (aref) == MEM_REF)
886 TREE_OPERAND (aref, 1)
887 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
889 if (TREE_CODE (ref) == MEM_REF
890 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
891 && integer_zerop (TREE_OPERAND (ref, 1)))
892 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
894 /* For canonicalization purposes we'd like to strip all outermost
895 zero-offset component-refs.
896 ??? For now simply handle zero-index array-refs. */
897 while (TREE_CODE (ref) == ARRAY_REF
898 && integer_zerop (TREE_OPERAND (ref, 1)))
899 ref = TREE_OPERAND (ref, 0);
901 DR_BASE_OBJECT (dr) = ref;
902 DR_ACCESS_FNS (dr) = access_fns;
905 /* Extracts the alias analysis information from the memory reference DR. */
908 dr_analyze_alias (struct data_reference *dr)
910 tree ref = DR_REF (dr);
911 tree base = get_base_address (ref), addr;
913 if (INDIRECT_REF_P (base)
914 || TREE_CODE (base) == MEM_REF)
916 addr = TREE_OPERAND (base, 0);
917 if (TREE_CODE (addr) == SSA_NAME)
918 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
922 /* Returns true if the address of DR is invariant. */
925 dr_address_invariant_p (struct data_reference *dr)
930 FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
931 if (tree_contains_chrecs (idx, NULL))
937 /* Frees data reference DR. */
940 free_data_ref (data_reference_p dr)
942 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
946 /* Analyzes memory reference MEMREF accessed in STMT. The reference
947 is read if IS_READ is true, write otherwise. Returns the
948 data_reference description of MEMREF. NEST is the outermost loop
949 in which the reference should be instantiated, LOOP is the loop in
950 which the data reference should be analyzed. */
952 struct data_reference *
953 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
956 struct data_reference *dr;
958 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "Creating dr for ");
961 print_generic_expr (dump_file, memref, TDF_SLIM);
962 fprintf (dump_file, "\n");
965 dr = XCNEW (struct data_reference);
967 DR_REF (dr) = memref;
968 DR_IS_READ (dr) = is_read;
970 dr_analyze_innermost (dr);
971 dr_analyze_indices (dr, nest, loop);
972 dr_analyze_alias (dr);
974 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "\tbase_address: ");
977 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
978 fprintf (dump_file, "\n\toffset from base address: ");
979 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
980 fprintf (dump_file, "\n\tconstant offset from base address: ");
981 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
982 fprintf (dump_file, "\n\tstep: ");
983 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
984 fprintf (dump_file, "\n\taligned to: ");
985 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
986 fprintf (dump_file, "\n\tbase_object: ");
987 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
988 fprintf (dump_file, "\n");
994 /* Returns true if FNA == FNB. */
997 affine_function_equal_p (affine_fn fna, affine_fn fnb)
999 unsigned i, n = VEC_length (tree, fna);
1001 if (n != VEC_length (tree, fnb))
1004 for (i = 0; i < n; i++)
1005 if (!operand_equal_p (VEC_index (tree, fna, i),
1006 VEC_index (tree, fnb, i), 0))
1012 /* If all the functions in CF are the same, returns one of them,
1013 otherwise returns NULL. */
1016 common_affine_function (conflict_function *cf)
1021 if (!CF_NONTRIVIAL_P (cf))
1026 for (i = 1; i < cf->n; i++)
1027 if (!affine_function_equal_p (comm, cf->fns[i]))
1033 /* Returns the base of the affine function FN. */
1036 affine_function_base (affine_fn fn)
1038 return VEC_index (tree, fn, 0);
1041 /* Returns true if FN is a constant. */
1044 affine_function_constant_p (affine_fn fn)
1049 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1050 if (!integer_zerop (coef))
1056 /* Returns true if FN is the zero constant function. */
1059 affine_function_zero_p (affine_fn fn)
1061 return (integer_zerop (affine_function_base (fn))
1062 && affine_function_constant_p (fn));
1065 /* Returns a signed integer type with the largest precision from TA
1069 signed_type_for_types (tree ta, tree tb)
1071 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1072 return signed_type_for (ta);
1074 return signed_type_for (tb);
1077 /* Applies operation OP on affine functions FNA and FNB, and returns the
1081 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1087 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1089 n = VEC_length (tree, fna);
1090 m = VEC_length (tree, fnb);
1094 n = VEC_length (tree, fnb);
1095 m = VEC_length (tree, fna);
1098 ret = VEC_alloc (tree, heap, m);
1099 for (i = 0; i < n; i++)
1101 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1102 TREE_TYPE (VEC_index (tree, fnb, i)));
1104 VEC_quick_push (tree, ret,
1105 fold_build2 (op, type,
1106 VEC_index (tree, fna, i),
1107 VEC_index (tree, fnb, i)));
1110 for (; VEC_iterate (tree, fna, i, coef); i++)
1111 VEC_quick_push (tree, ret,
1112 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1113 coef, integer_zero_node));
1114 for (; VEC_iterate (tree, fnb, i, coef); i++)
1115 VEC_quick_push (tree, ret,
1116 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1117 integer_zero_node, coef));
1122 /* Returns the sum of affine functions FNA and FNB. */
1125 affine_fn_plus (affine_fn fna, affine_fn fnb)
1127 return affine_fn_op (PLUS_EXPR, fna, fnb);
1130 /* Returns the difference of affine functions FNA and FNB. */
1133 affine_fn_minus (affine_fn fna, affine_fn fnb)
1135 return affine_fn_op (MINUS_EXPR, fna, fnb);
1138 /* Frees affine function FN. */
1141 affine_fn_free (affine_fn fn)
1143 VEC_free (tree, heap, fn);
1146 /* Determine for each subscript in the data dependence relation DDR
1150 compute_subscript_distance (struct data_dependence_relation *ddr)
1152 conflict_function *cf_a, *cf_b;
1153 affine_fn fn_a, fn_b, diff;
1155 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1159 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1161 struct subscript *subscript;
1163 subscript = DDR_SUBSCRIPT (ddr, i);
1164 cf_a = SUB_CONFLICTS_IN_A (subscript);
1165 cf_b = SUB_CONFLICTS_IN_B (subscript);
1167 fn_a = common_affine_function (cf_a);
1168 fn_b = common_affine_function (cf_b);
1171 SUB_DISTANCE (subscript) = chrec_dont_know;
1174 diff = affine_fn_minus (fn_a, fn_b);
1176 if (affine_function_constant_p (diff))
1177 SUB_DISTANCE (subscript) = affine_function_base (diff);
1179 SUB_DISTANCE (subscript) = chrec_dont_know;
1181 affine_fn_free (diff);
1186 /* Returns the conflict function for "unknown". */
1188 static conflict_function *
1189 conflict_fn_not_known (void)
1191 conflict_function *fn = XCNEW (conflict_function);
1197 /* Returns the conflict function for "independent". */
1199 static conflict_function *
1200 conflict_fn_no_dependence (void)
1202 conflict_function *fn = XCNEW (conflict_function);
1203 fn->n = NO_DEPENDENCE;
1208 /* Returns true if the address of OBJ is invariant in LOOP. */
1211 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1213 while (handled_component_p (obj))
1215 if (TREE_CODE (obj) == ARRAY_REF)
1217 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1218 need to check the stride and the lower bound of the reference. */
1219 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1221 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1225 else if (TREE_CODE (obj) == COMPONENT_REF)
1227 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1231 obj = TREE_OPERAND (obj, 0);
1234 if (!INDIRECT_REF_P (obj)
1235 && TREE_CODE (obj) != MEM_REF)
1238 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1242 /* Returns false if we can prove that data references A and B do not alias,
1246 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1248 tree addr_a = DR_BASE_OBJECT (a);
1249 tree addr_b = DR_BASE_OBJECT (b);
1251 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1252 return refs_output_dependent_p (addr_a, addr_b);
1253 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1254 return refs_anti_dependent_p (addr_a, addr_b);
1255 return refs_may_alias_p (addr_a, addr_b);
1258 static void compute_self_dependence (struct data_dependence_relation *);
1260 /* Initialize a data dependence relation between data accesses A and
1261 B. NB_LOOPS is the number of loops surrounding the references: the
1262 size of the classic distance/direction vectors. */
1264 static struct data_dependence_relation *
1265 initialize_data_dependence_relation (struct data_reference *a,
1266 struct data_reference *b,
1267 VEC (loop_p, heap) *loop_nest)
1269 struct data_dependence_relation *res;
1272 res = XNEW (struct data_dependence_relation);
1275 DDR_LOOP_NEST (res) = NULL;
1276 DDR_REVERSED_P (res) = false;
1277 DDR_SUBSCRIPTS (res) = NULL;
1278 DDR_DIR_VECTS (res) = NULL;
1279 DDR_DIST_VECTS (res) = NULL;
1281 if (a == NULL || b == NULL)
1283 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1287 /* If the data references do not alias, then they are independent. */
1288 if (!dr_may_alias_p (a, b))
1290 DDR_ARE_DEPENDENT (res) = chrec_known;
1294 /* When the references are exactly the same, don't spend time doing
1295 the data dependence tests, just initialize the ddr and return. */
1296 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1298 DDR_AFFINE_P (res) = true;
1299 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1300 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1301 DDR_LOOP_NEST (res) = loop_nest;
1302 DDR_INNER_LOOP (res) = 0;
1303 DDR_SELF_REFERENCE (res) = true;
1304 compute_self_dependence (res);
1308 /* If the references do not access the same object, we do not know
1309 whether they alias or not. */
1310 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1312 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1316 /* If the base of the object is not invariant in the loop nest, we cannot
1317 analyze it. TODO -- in fact, it would suffice to record that there may
1318 be arbitrary dependences in the loops where the base object varies. */
1320 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1321 DR_BASE_OBJECT (a)))
1323 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1327 /* If the number of dimensions of the access to not agree we can have
1328 a pointer access to a component of the array element type and an
1329 array access while the base-objects are still the same. Punt. */
1330 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1332 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1336 DDR_AFFINE_P (res) = true;
1337 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1338 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1339 DDR_LOOP_NEST (res) = loop_nest;
1340 DDR_INNER_LOOP (res) = 0;
1341 DDR_SELF_REFERENCE (res) = false;
1343 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1345 struct subscript *subscript;
1347 subscript = XNEW (struct subscript);
1348 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1349 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1350 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1351 SUB_DISTANCE (subscript) = chrec_dont_know;
1352 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1358 /* Frees memory used by the conflict function F. */
1361 free_conflict_function (conflict_function *f)
1365 if (CF_NONTRIVIAL_P (f))
1367 for (i = 0; i < f->n; i++)
1368 affine_fn_free (f->fns[i]);
1373 /* Frees memory used by SUBSCRIPTS. */
1376 free_subscripts (VEC (subscript_p, heap) *subscripts)
1381 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1383 free_conflict_function (s->conflicting_iterations_in_a);
1384 free_conflict_function (s->conflicting_iterations_in_b);
1387 VEC_free (subscript_p, heap, subscripts);
1390 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1394 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1397 if (dump_file && (dump_flags & TDF_DETAILS))
1399 fprintf (dump_file, "(dependence classified: ");
1400 print_generic_expr (dump_file, chrec, 0);
1401 fprintf (dump_file, ")\n");
1404 DDR_ARE_DEPENDENT (ddr) = chrec;
1405 free_subscripts (DDR_SUBSCRIPTS (ddr));
1406 DDR_SUBSCRIPTS (ddr) = NULL;
1409 /* The dependence relation DDR cannot be represented by a distance
1413 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1415 if (dump_file && (dump_flags & TDF_DETAILS))
1416 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1418 DDR_AFFINE_P (ddr) = false;
1423 /* This section contains the classic Banerjee tests. */
1425 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1426 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1429 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1431 return (evolution_function_is_constant_p (chrec_a)
1432 && evolution_function_is_constant_p (chrec_b));
1435 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1436 variable, i.e., if the SIV (Single Index Variable) test is true. */
1439 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1441 if ((evolution_function_is_constant_p (chrec_a)
1442 && evolution_function_is_univariate_p (chrec_b))
1443 || (evolution_function_is_constant_p (chrec_b)
1444 && evolution_function_is_univariate_p (chrec_a)))
1447 if (evolution_function_is_univariate_p (chrec_a)
1448 && evolution_function_is_univariate_p (chrec_b))
1450 switch (TREE_CODE (chrec_a))
1452 case POLYNOMIAL_CHREC:
1453 switch (TREE_CODE (chrec_b))
1455 case POLYNOMIAL_CHREC:
1456 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1471 /* Creates a conflict function with N dimensions. The affine functions
1472 in each dimension follow. */
1474 static conflict_function *
1475 conflict_fn (unsigned n, ...)
1478 conflict_function *ret = XCNEW (conflict_function);
1481 gcc_assert (0 < n && n <= MAX_DIM);
1485 for (i = 0; i < n; i++)
1486 ret->fns[i] = va_arg (ap, affine_fn);
1492 /* Returns constant affine function with value CST. */
1495 affine_fn_cst (tree cst)
1497 affine_fn fn = VEC_alloc (tree, heap, 1);
1498 VEC_quick_push (tree, fn, cst);
1502 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1505 affine_fn_univar (tree cst, unsigned dim, tree coef)
1507 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1510 gcc_assert (dim > 0);
1511 VEC_quick_push (tree, fn, cst);
1512 for (i = 1; i < dim; i++)
1513 VEC_quick_push (tree, fn, integer_zero_node);
1514 VEC_quick_push (tree, fn, coef);
1518 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1519 *OVERLAPS_B are initialized to the functions that describe the
1520 relation between the elements accessed twice by CHREC_A and
1521 CHREC_B. For k >= 0, the following property is verified:
1523 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1526 analyze_ziv_subscript (tree chrec_a,
1528 conflict_function **overlaps_a,
1529 conflict_function **overlaps_b,
1530 tree *last_conflicts)
1532 tree type, difference;
1533 dependence_stats.num_ziv++;
1535 if (dump_file && (dump_flags & TDF_DETAILS))
1536 fprintf (dump_file, "(analyze_ziv_subscript \n");
1538 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1539 chrec_a = chrec_convert (type, chrec_a, NULL);
1540 chrec_b = chrec_convert (type, chrec_b, NULL);
1541 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1543 switch (TREE_CODE (difference))
1546 if (integer_zerop (difference))
1548 /* The difference is equal to zero: the accessed index
1549 overlaps for each iteration in the loop. */
1550 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1551 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1552 *last_conflicts = chrec_dont_know;
1553 dependence_stats.num_ziv_dependent++;
1557 /* The accesses do not overlap. */
1558 *overlaps_a = conflict_fn_no_dependence ();
1559 *overlaps_b = conflict_fn_no_dependence ();
1560 *last_conflicts = integer_zero_node;
1561 dependence_stats.num_ziv_independent++;
1566 /* We're not sure whether the indexes overlap. For the moment,
1567 conservatively answer "don't know". */
1568 if (dump_file && (dump_flags & TDF_DETAILS))
1569 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1571 *overlaps_a = conflict_fn_not_known ();
1572 *overlaps_b = conflict_fn_not_known ();
1573 *last_conflicts = chrec_dont_know;
1574 dependence_stats.num_ziv_unimplemented++;
1578 if (dump_file && (dump_flags & TDF_DETAILS))
1579 fprintf (dump_file, ")\n");
1582 /* Sets NIT to the estimated number of executions of the statements in
1583 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1584 large as the number of iterations. If we have no reliable estimate,
1585 the function returns false, otherwise returns true. */
1588 estimated_loop_iterations (struct loop *loop, bool conservative,
1591 estimate_numbers_of_iterations_loop (loop, true);
1594 if (!loop->any_upper_bound)
1597 *nit = loop->nb_iterations_upper_bound;
1601 if (!loop->any_estimate)
1604 *nit = loop->nb_iterations_estimate;
1610 /* Similar to estimated_loop_iterations, but returns the estimate only
1611 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1612 on the number of iterations of LOOP could not be derived, returns -1. */
1615 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1618 HOST_WIDE_INT hwi_nit;
1620 if (!estimated_loop_iterations (loop, conservative, &nit))
1623 if (!double_int_fits_in_shwi_p (nit))
1625 hwi_nit = double_int_to_shwi (nit);
1627 return hwi_nit < 0 ? -1 : hwi_nit;
1630 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1631 and only if it fits to the int type. If this is not the case, or the
1632 estimate on the number of iterations of LOOP could not be derived, returns
1636 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1641 if (!estimated_loop_iterations (loop, conservative, &nit))
1642 return chrec_dont_know;
1644 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1645 if (!double_int_fits_to_tree_p (type, nit))
1646 return chrec_dont_know;
1648 return double_int_to_tree (type, nit);
1651 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1652 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1653 *OVERLAPS_B are initialized to the functions that describe the
1654 relation between the elements accessed twice by CHREC_A and
1655 CHREC_B. For k >= 0, the following property is verified:
1657 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1660 analyze_siv_subscript_cst_affine (tree chrec_a,
1662 conflict_function **overlaps_a,
1663 conflict_function **overlaps_b,
1664 tree *last_conflicts)
1666 bool value0, value1, value2;
1667 tree type, difference, tmp;
1669 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1670 chrec_a = chrec_convert (type, chrec_a, NULL);
1671 chrec_b = chrec_convert (type, chrec_b, NULL);
1672 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1674 if (!chrec_is_positive (initial_condition (difference), &value0))
1676 if (dump_file && (dump_flags & TDF_DETAILS))
1677 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1679 dependence_stats.num_siv_unimplemented++;
1680 *overlaps_a = conflict_fn_not_known ();
1681 *overlaps_b = conflict_fn_not_known ();
1682 *last_conflicts = chrec_dont_know;
1687 if (value0 == false)
1689 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1691 if (dump_file && (dump_flags & TDF_DETAILS))
1692 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1694 *overlaps_a = conflict_fn_not_known ();
1695 *overlaps_b = conflict_fn_not_known ();
1696 *last_conflicts = chrec_dont_know;
1697 dependence_stats.num_siv_unimplemented++;
1706 chrec_b = {10, +, 1}
1709 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1711 HOST_WIDE_INT numiter;
1712 struct loop *loop = get_chrec_loop (chrec_b);
1714 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1715 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1716 fold_build1 (ABS_EXPR, type, difference),
1717 CHREC_RIGHT (chrec_b));
1718 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1719 *last_conflicts = integer_one_node;
1722 /* Perform weak-zero siv test to see if overlap is
1723 outside the loop bounds. */
1724 numiter = estimated_loop_iterations_int (loop, false);
1727 && compare_tree_int (tmp, numiter) > 0)
1729 free_conflict_function (*overlaps_a);
1730 free_conflict_function (*overlaps_b);
1731 *overlaps_a = conflict_fn_no_dependence ();
1732 *overlaps_b = conflict_fn_no_dependence ();
1733 *last_conflicts = integer_zero_node;
1734 dependence_stats.num_siv_independent++;
1737 dependence_stats.num_siv_dependent++;
1741 /* When the step does not divide the difference, there are
1745 *overlaps_a = conflict_fn_no_dependence ();
1746 *overlaps_b = conflict_fn_no_dependence ();
1747 *last_conflicts = integer_zero_node;
1748 dependence_stats.num_siv_independent++;
1757 chrec_b = {10, +, -1}
1759 In this case, chrec_a will not overlap with chrec_b. */
1760 *overlaps_a = conflict_fn_no_dependence ();
1761 *overlaps_b = conflict_fn_no_dependence ();
1762 *last_conflicts = integer_zero_node;
1763 dependence_stats.num_siv_independent++;
1770 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1772 if (dump_file && (dump_flags & TDF_DETAILS))
1773 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1775 *overlaps_a = conflict_fn_not_known ();
1776 *overlaps_b = conflict_fn_not_known ();
1777 *last_conflicts = chrec_dont_know;
1778 dependence_stats.num_siv_unimplemented++;
1783 if (value2 == false)
1787 chrec_b = {10, +, -1}
1789 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1791 HOST_WIDE_INT numiter;
1792 struct loop *loop = get_chrec_loop (chrec_b);
1794 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1795 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1796 CHREC_RIGHT (chrec_b));
1797 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1798 *last_conflicts = integer_one_node;
1800 /* Perform weak-zero siv test to see if overlap is
1801 outside the loop bounds. */
1802 numiter = estimated_loop_iterations_int (loop, false);
1805 && compare_tree_int (tmp, numiter) > 0)
1807 free_conflict_function (*overlaps_a);
1808 free_conflict_function (*overlaps_b);
1809 *overlaps_a = conflict_fn_no_dependence ();
1810 *overlaps_b = conflict_fn_no_dependence ();
1811 *last_conflicts = integer_zero_node;
1812 dependence_stats.num_siv_independent++;
1815 dependence_stats.num_siv_dependent++;
1819 /* When the step does not divide the difference, there
1823 *overlaps_a = conflict_fn_no_dependence ();
1824 *overlaps_b = conflict_fn_no_dependence ();
1825 *last_conflicts = integer_zero_node;
1826 dependence_stats.num_siv_independent++;
1836 In this case, chrec_a will not overlap with chrec_b. */
1837 *overlaps_a = conflict_fn_no_dependence ();
1838 *overlaps_b = conflict_fn_no_dependence ();
1839 *last_conflicts = integer_zero_node;
1840 dependence_stats.num_siv_independent++;
1848 /* Helper recursive function for initializing the matrix A. Returns
1849 the initial value of CHREC. */
1852 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1856 switch (TREE_CODE (chrec))
1858 case POLYNOMIAL_CHREC:
1859 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1861 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1862 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1868 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1869 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1871 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1876 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1877 return chrec_convert (chrec_type (chrec), op, NULL);
1882 /* Handle ~X as -1 - X. */
1883 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1884 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1885 build_int_cst (TREE_TYPE (chrec), -1), op);
1897 #define FLOOR_DIV(x,y) ((x) / (y))
1899 /* Solves the special case of the Diophantine equation:
1900 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1902 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1903 number of iterations that loops X and Y run. The overlaps will be
1904 constructed as evolutions in dimension DIM. */
1907 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1908 affine_fn *overlaps_a,
1909 affine_fn *overlaps_b,
1910 tree *last_conflicts, int dim)
1912 if (((step_a > 0 && step_b > 0)
1913 || (step_a < 0 && step_b < 0)))
1915 int step_overlaps_a, step_overlaps_b;
1916 int gcd_steps_a_b, last_conflict, tau2;
1918 gcd_steps_a_b = gcd (step_a, step_b);
1919 step_overlaps_a = step_b / gcd_steps_a_b;
1920 step_overlaps_b = step_a / gcd_steps_a_b;
1924 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1925 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1926 last_conflict = tau2;
1927 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1930 *last_conflicts = chrec_dont_know;
1932 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1933 build_int_cst (NULL_TREE,
1935 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1936 build_int_cst (NULL_TREE,
1942 *overlaps_a = affine_fn_cst (integer_zero_node);
1943 *overlaps_b = affine_fn_cst (integer_zero_node);
1944 *last_conflicts = integer_zero_node;
1948 /* Solves the special case of a Diophantine equation where CHREC_A is
1949 an affine bivariate function, and CHREC_B is an affine univariate
1950 function. For example,
1952 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1954 has the following overlapping functions:
1956 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1957 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1958 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1960 FORNOW: This is a specialized implementation for a case occurring in
1961 a common benchmark. Implement the general algorithm. */
1964 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1965 conflict_function **overlaps_a,
1966 conflict_function **overlaps_b,
1967 tree *last_conflicts)
1969 bool xz_p, yz_p, xyz_p;
1970 int step_x, step_y, step_z;
1971 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1972 affine_fn overlaps_a_xz, overlaps_b_xz;
1973 affine_fn overlaps_a_yz, overlaps_b_yz;
1974 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1975 affine_fn ova1, ova2, ovb;
1976 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1978 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1979 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1980 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1983 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
1985 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
1986 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
1988 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1990 if (dump_file && (dump_flags & TDF_DETAILS))
1991 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1993 *overlaps_a = conflict_fn_not_known ();
1994 *overlaps_b = conflict_fn_not_known ();
1995 *last_conflicts = chrec_dont_know;
1999 niter = MIN (niter_x, niter_z);
2000 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2003 &last_conflicts_xz, 1);
2004 niter = MIN (niter_y, niter_z);
2005 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2008 &last_conflicts_yz, 2);
2009 niter = MIN (niter_x, niter_z);
2010 niter = MIN (niter_y, niter);
2011 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2014 &last_conflicts_xyz, 3);
2016 xz_p = !integer_zerop (last_conflicts_xz);
2017 yz_p = !integer_zerop (last_conflicts_yz);
2018 xyz_p = !integer_zerop (last_conflicts_xyz);
2020 if (xz_p || yz_p || xyz_p)
2022 ova1 = affine_fn_cst (integer_zero_node);
2023 ova2 = affine_fn_cst (integer_zero_node);
2024 ovb = affine_fn_cst (integer_zero_node);
2027 affine_fn t0 = ova1;
2030 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2031 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2032 affine_fn_free (t0);
2033 affine_fn_free (t2);
2034 *last_conflicts = last_conflicts_xz;
2038 affine_fn t0 = ova2;
2041 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2042 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2043 affine_fn_free (t0);
2044 affine_fn_free (t2);
2045 *last_conflicts = last_conflicts_yz;
2049 affine_fn t0 = ova1;
2050 affine_fn t2 = ova2;
2053 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2054 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2055 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2056 affine_fn_free (t0);
2057 affine_fn_free (t2);
2058 affine_fn_free (t4);
2059 *last_conflicts = last_conflicts_xyz;
2061 *overlaps_a = conflict_fn (2, ova1, ova2);
2062 *overlaps_b = conflict_fn (1, ovb);
2066 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2067 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2068 *last_conflicts = integer_zero_node;
2071 affine_fn_free (overlaps_a_xz);
2072 affine_fn_free (overlaps_b_xz);
2073 affine_fn_free (overlaps_a_yz);
2074 affine_fn_free (overlaps_b_yz);
2075 affine_fn_free (overlaps_a_xyz);
2076 affine_fn_free (overlaps_b_xyz);
2079 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2082 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2085 memcpy (vec2, vec1, size * sizeof (*vec1));
2088 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2091 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2096 for (i = 0; i < m; i++)
2097 lambda_vector_copy (mat1[i], mat2[i], n);
2100 /* Store the N x N identity matrix in MAT. */
2103 lambda_matrix_id (lambda_matrix mat, int size)
2107 for (i = 0; i < size; i++)
2108 for (j = 0; j < size; j++)
2109 mat[i][j] = (i == j) ? 1 : 0;
2112 /* Return the first nonzero element of vector VEC1 between START and N.
2113 We must have START <= N. Returns N if VEC1 is the zero vector. */
2116 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2119 while (j < n && vec1[j] == 0)
2124 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2125 R2 = R2 + CONST1 * R1. */
2128 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2135 for (i = 0; i < n; i++)
2136 mat[r2][i] += const1 * mat[r1][i];
2139 /* Swap rows R1 and R2 in matrix MAT. */
2142 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2151 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2152 and store the result in VEC2. */
2155 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2156 int size, int const1)
2161 lambda_vector_clear (vec2, size);
2163 for (i = 0; i < size; i++)
2164 vec2[i] = const1 * vec1[i];
2167 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2170 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2173 lambda_vector_mult_const (vec1, vec2, size, -1);
2176 /* Negate row R1 of matrix MAT which has N columns. */
2179 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2181 lambda_vector_negate (mat[r1], mat[r1], n);
2184 /* Return true if two vectors are equal. */
2187 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2190 for (i = 0; i < size; i++)
2191 if (vec1[i] != vec2[i])
2196 /* Given an M x N integer matrix A, this function determines an M x
2197 M unimodular matrix U, and an M x N echelon matrix S such that
2198 "U.A = S". This decomposition is also known as "right Hermite".
2200 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2201 Restructuring Compilers" Utpal Banerjee. */
2204 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2205 lambda_matrix S, lambda_matrix U)
2209 lambda_matrix_copy (A, S, m, n);
2210 lambda_matrix_id (U, m);
2212 for (j = 0; j < n; j++)
2214 if (lambda_vector_first_nz (S[j], m, i0) < m)
2217 for (i = m - 1; i >= i0; i--)
2219 while (S[i][j] != 0)
2221 int sigma, factor, a, b;
2225 sigma = (a * b < 0) ? -1: 1;
2228 factor = sigma * (a / b);
2230 lambda_matrix_row_add (S, n, i, i-1, -factor);
2231 lambda_matrix_row_exchange (S, i, i-1);
2233 lambda_matrix_row_add (U, m, i, i-1, -factor);
2234 lambda_matrix_row_exchange (U, i, i-1);
2241 /* Determines the overlapping elements due to accesses CHREC_A and
2242 CHREC_B, that are affine functions. This function cannot handle
2243 symbolic evolution functions, ie. when initial conditions are
2244 parameters, because it uses lambda matrices of integers. */
2247 analyze_subscript_affine_affine (tree chrec_a,
2249 conflict_function **overlaps_a,
2250 conflict_function **overlaps_b,
2251 tree *last_conflicts)
2253 unsigned nb_vars_a, nb_vars_b, dim;
2254 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2255 lambda_matrix A, U, S;
2256 struct obstack scratch_obstack;
2258 if (eq_evolutions_p (chrec_a, chrec_b))
2260 /* The accessed index overlaps for each iteration in the
2262 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2263 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2264 *last_conflicts = chrec_dont_know;
2267 if (dump_file && (dump_flags & TDF_DETAILS))
2268 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2270 /* For determining the initial intersection, we have to solve a
2271 Diophantine equation. This is the most time consuming part.
2273 For answering to the question: "Is there a dependence?" we have
2274 to prove that there exists a solution to the Diophantine
2275 equation, and that the solution is in the iteration domain,
2276 i.e. the solution is positive or zero, and that the solution
2277 happens before the upper bound loop.nb_iterations. Otherwise
2278 there is no dependence. This function outputs a description of
2279 the iterations that hold the intersections. */
2281 nb_vars_a = nb_vars_in_chrec (chrec_a);
2282 nb_vars_b = nb_vars_in_chrec (chrec_b);
2284 gcc_obstack_init (&scratch_obstack);
2286 dim = nb_vars_a + nb_vars_b;
2287 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2288 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2289 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2291 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2292 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2293 gamma = init_b - init_a;
2295 /* Don't do all the hard work of solving the Diophantine equation
2296 when we already know the solution: for example,
2299 | gamma = 3 - 3 = 0.
2300 Then the first overlap occurs during the first iterations:
2301 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2305 if (nb_vars_a == 1 && nb_vars_b == 1)
2307 HOST_WIDE_INT step_a, step_b;
2308 HOST_WIDE_INT niter, niter_a, niter_b;
2311 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2313 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2315 niter = MIN (niter_a, niter_b);
2316 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2317 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2319 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2322 *overlaps_a = conflict_fn (1, ova);
2323 *overlaps_b = conflict_fn (1, ovb);
2326 else if (nb_vars_a == 2 && nb_vars_b == 1)
2327 compute_overlap_steps_for_affine_1_2
2328 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2330 else if (nb_vars_a == 1 && nb_vars_b == 2)
2331 compute_overlap_steps_for_affine_1_2
2332 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2337 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2338 *overlaps_a = conflict_fn_not_known ();
2339 *overlaps_b = conflict_fn_not_known ();
2340 *last_conflicts = chrec_dont_know;
2342 goto end_analyze_subs_aa;
2346 lambda_matrix_right_hermite (A, dim, 1, S, U);
2351 lambda_matrix_row_negate (U, dim, 0);
2353 gcd_alpha_beta = S[0][0];
2355 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2356 but that is a quite strange case. Instead of ICEing, answer
2358 if (gcd_alpha_beta == 0)
2360 *overlaps_a = conflict_fn_not_known ();
2361 *overlaps_b = conflict_fn_not_known ();
2362 *last_conflicts = chrec_dont_know;
2363 goto end_analyze_subs_aa;
2366 /* The classic "gcd-test". */
2367 if (!int_divides_p (gcd_alpha_beta, gamma))
2369 /* The "gcd-test" has determined that there is no integer
2370 solution, i.e. there is no dependence. */
2371 *overlaps_a = conflict_fn_no_dependence ();
2372 *overlaps_b = conflict_fn_no_dependence ();
2373 *last_conflicts = integer_zero_node;
2376 /* Both access functions are univariate. This includes SIV and MIV cases. */
2377 else if (nb_vars_a == 1 && nb_vars_b == 1)
2379 /* Both functions should have the same evolution sign. */
2380 if (((A[0][0] > 0 && -A[1][0] > 0)
2381 || (A[0][0] < 0 && -A[1][0] < 0)))
2383 /* The solutions are given by:
2385 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2388 For a given integer t. Using the following variables,
2390 | i0 = u11 * gamma / gcd_alpha_beta
2391 | j0 = u12 * gamma / gcd_alpha_beta
2398 | y0 = j0 + j1 * t. */
2399 HOST_WIDE_INT i0, j0, i1, j1;
2401 i0 = U[0][0] * gamma / gcd_alpha_beta;
2402 j0 = U[0][1] * gamma / gcd_alpha_beta;
2406 if ((i1 == 0 && i0 < 0)
2407 || (j1 == 0 && j0 < 0))
2409 /* There is no solution.
2410 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2411 falls in here, but for the moment we don't look at the
2412 upper bound of the iteration domain. */
2413 *overlaps_a = conflict_fn_no_dependence ();
2414 *overlaps_b = conflict_fn_no_dependence ();
2415 *last_conflicts = integer_zero_node;
2416 goto end_analyze_subs_aa;
2419 if (i1 > 0 && j1 > 0)
2421 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2422 (get_chrec_loop (chrec_a), false);
2423 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2424 (get_chrec_loop (chrec_b), false);
2425 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2427 /* (X0, Y0) is a solution of the Diophantine equation:
2428 "chrec_a (X0) = chrec_b (Y0)". */
2429 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2431 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2432 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2434 /* (X1, Y1) is the smallest positive solution of the eq
2435 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2436 first conflict occurs. */
2437 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2438 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2439 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2443 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2444 FLOOR_DIV (niter - j0, j1));
2445 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2447 /* If the overlap occurs outside of the bounds of the
2448 loop, there is no dependence. */
2449 if (x1 >= niter || y1 >= niter)
2451 *overlaps_a = conflict_fn_no_dependence ();
2452 *overlaps_b = conflict_fn_no_dependence ();
2453 *last_conflicts = integer_zero_node;
2454 goto end_analyze_subs_aa;
2457 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2460 *last_conflicts = chrec_dont_know;
2464 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2466 build_int_cst (NULL_TREE, i1)));
2469 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2471 build_int_cst (NULL_TREE, j1)));
2475 /* FIXME: For the moment, the upper bound of the
2476 iteration domain for i and j is not checked. */
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2479 *overlaps_a = conflict_fn_not_known ();
2480 *overlaps_b = conflict_fn_not_known ();
2481 *last_conflicts = chrec_dont_know;
2486 if (dump_file && (dump_flags & TDF_DETAILS))
2487 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2488 *overlaps_a = conflict_fn_not_known ();
2489 *overlaps_b = conflict_fn_not_known ();
2490 *last_conflicts = chrec_dont_know;
2495 if (dump_file && (dump_flags & TDF_DETAILS))
2496 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2497 *overlaps_a = conflict_fn_not_known ();
2498 *overlaps_b = conflict_fn_not_known ();
2499 *last_conflicts = chrec_dont_know;
2502 end_analyze_subs_aa:
2503 obstack_free (&scratch_obstack, NULL);
2504 if (dump_file && (dump_flags & TDF_DETAILS))
2506 fprintf (dump_file, " (overlaps_a = ");
2507 dump_conflict_function (dump_file, *overlaps_a);
2508 fprintf (dump_file, ")\n (overlaps_b = ");
2509 dump_conflict_function (dump_file, *overlaps_b);
2510 fprintf (dump_file, ")\n");
2511 fprintf (dump_file, ")\n");
2515 /* Returns true when analyze_subscript_affine_affine can be used for
2516 determining the dependence relation between chrec_a and chrec_b,
2517 that contain symbols. This function modifies chrec_a and chrec_b
2518 such that the analysis result is the same, and such that they don't
2519 contain symbols, and then can safely be passed to the analyzer.
2521 Example: The analysis of the following tuples of evolutions produce
2522 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2525 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2526 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2530 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2532 tree diff, type, left_a, left_b, right_b;
2534 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2535 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2536 /* FIXME: For the moment not handled. Might be refined later. */
2539 type = chrec_type (*chrec_a);
2540 left_a = CHREC_LEFT (*chrec_a);
2541 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2542 diff = chrec_fold_minus (type, left_a, left_b);
2544 if (!evolution_function_is_constant_p (diff))
2547 if (dump_file && (dump_flags & TDF_DETAILS))
2548 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2550 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2551 diff, CHREC_RIGHT (*chrec_a));
2552 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2553 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2554 build_int_cst (type, 0),
2559 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2560 *OVERLAPS_B are initialized to the functions that describe the
2561 relation between the elements accessed twice by CHREC_A and
2562 CHREC_B. For k >= 0, the following property is verified:
2564 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2567 analyze_siv_subscript (tree chrec_a,
2569 conflict_function **overlaps_a,
2570 conflict_function **overlaps_b,
2571 tree *last_conflicts,
2574 dependence_stats.num_siv++;
2576 if (dump_file && (dump_flags & TDF_DETAILS))
2577 fprintf (dump_file, "(analyze_siv_subscript \n");
2579 if (evolution_function_is_constant_p (chrec_a)
2580 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2581 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2582 overlaps_a, overlaps_b, last_conflicts);
2584 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2585 && evolution_function_is_constant_p (chrec_b))
2586 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2587 overlaps_b, overlaps_a, last_conflicts);
2589 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2590 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2592 if (!chrec_contains_symbols (chrec_a)
2593 && !chrec_contains_symbols (chrec_b))
2595 analyze_subscript_affine_affine (chrec_a, chrec_b,
2596 overlaps_a, overlaps_b,
2599 if (CF_NOT_KNOWN_P (*overlaps_a)
2600 || CF_NOT_KNOWN_P (*overlaps_b))
2601 dependence_stats.num_siv_unimplemented++;
2602 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2603 || CF_NO_DEPENDENCE_P (*overlaps_b))
2604 dependence_stats.num_siv_independent++;
2606 dependence_stats.num_siv_dependent++;
2608 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2611 analyze_subscript_affine_affine (chrec_a, chrec_b,
2612 overlaps_a, overlaps_b,
2615 if (CF_NOT_KNOWN_P (*overlaps_a)
2616 || CF_NOT_KNOWN_P (*overlaps_b))
2617 dependence_stats.num_siv_unimplemented++;
2618 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2619 || CF_NO_DEPENDENCE_P (*overlaps_b))
2620 dependence_stats.num_siv_independent++;
2622 dependence_stats.num_siv_dependent++;
2625 goto siv_subscript_dontknow;
2630 siv_subscript_dontknow:;
2631 if (dump_file && (dump_flags & TDF_DETAILS))
2632 fprintf (dump_file, "siv test failed: unimplemented.\n");
2633 *overlaps_a = conflict_fn_not_known ();
2634 *overlaps_b = conflict_fn_not_known ();
2635 *last_conflicts = chrec_dont_know;
2636 dependence_stats.num_siv_unimplemented++;
2639 if (dump_file && (dump_flags & TDF_DETAILS))
2640 fprintf (dump_file, ")\n");
2643 /* Returns false if we can prove that the greatest common divisor of the steps
2644 of CHREC does not divide CST, false otherwise. */
2647 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2649 HOST_WIDE_INT cd = 0, val;
2652 if (!host_integerp (cst, 0))
2654 val = tree_low_cst (cst, 0);
2656 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2658 step = CHREC_RIGHT (chrec);
2659 if (!host_integerp (step, 0))
2661 cd = gcd (cd, tree_low_cst (step, 0));
2662 chrec = CHREC_LEFT (chrec);
2665 return val % cd == 0;
2668 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2669 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2670 functions that describe the relation between the elements accessed
2671 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2674 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2677 analyze_miv_subscript (tree chrec_a,
2679 conflict_function **overlaps_a,
2680 conflict_function **overlaps_b,
2681 tree *last_conflicts,
2682 struct loop *loop_nest)
2684 /* FIXME: This is a MIV subscript, not yet handled.
2685 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2688 In the SIV test we had to solve a Diophantine equation with two
2689 variables. In the MIV case we have to solve a Diophantine
2690 equation with 2*n variables (if the subscript uses n IVs).
2692 tree type, difference;
2694 dependence_stats.num_miv++;
2695 if (dump_file && (dump_flags & TDF_DETAILS))
2696 fprintf (dump_file, "(analyze_miv_subscript \n");
2698 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2699 chrec_a = chrec_convert (type, chrec_a, NULL);
2700 chrec_b = chrec_convert (type, chrec_b, NULL);
2701 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2703 if (eq_evolutions_p (chrec_a, chrec_b))
2705 /* Access functions are the same: all the elements are accessed
2706 in the same order. */
2707 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2708 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2709 *last_conflicts = estimated_loop_iterations_tree
2710 (get_chrec_loop (chrec_a), true);
2711 dependence_stats.num_miv_dependent++;
2714 else if (evolution_function_is_constant_p (difference)
2715 /* For the moment, the following is verified:
2716 evolution_function_is_affine_multivariate_p (chrec_a,
2718 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2720 /* testsuite/.../ssa-chrec-33.c
2721 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2723 The difference is 1, and all the evolution steps are multiples
2724 of 2, consequently there are no overlapping elements. */
2725 *overlaps_a = conflict_fn_no_dependence ();
2726 *overlaps_b = conflict_fn_no_dependence ();
2727 *last_conflicts = integer_zero_node;
2728 dependence_stats.num_miv_independent++;
2731 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2732 && !chrec_contains_symbols (chrec_a)
2733 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2734 && !chrec_contains_symbols (chrec_b))
2736 /* testsuite/.../ssa-chrec-35.c
2737 {0, +, 1}_2 vs. {0, +, 1}_3
2738 the overlapping elements are respectively located at iterations:
2739 {0, +, 1}_x and {0, +, 1}_x,
2740 in other words, we have the equality:
2741 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2744 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2745 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2747 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2748 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2750 analyze_subscript_affine_affine (chrec_a, chrec_b,
2751 overlaps_a, overlaps_b, last_conflicts);
2753 if (CF_NOT_KNOWN_P (*overlaps_a)
2754 || CF_NOT_KNOWN_P (*overlaps_b))
2755 dependence_stats.num_miv_unimplemented++;
2756 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2757 || CF_NO_DEPENDENCE_P (*overlaps_b))
2758 dependence_stats.num_miv_independent++;
2760 dependence_stats.num_miv_dependent++;
2765 /* When the analysis is too difficult, answer "don't know". */
2766 if (dump_file && (dump_flags & TDF_DETAILS))
2767 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2769 *overlaps_a = conflict_fn_not_known ();
2770 *overlaps_b = conflict_fn_not_known ();
2771 *last_conflicts = chrec_dont_know;
2772 dependence_stats.num_miv_unimplemented++;
2775 if (dump_file && (dump_flags & TDF_DETAILS))
2776 fprintf (dump_file, ")\n");
2779 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2780 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2781 OVERLAP_ITERATIONS_B are initialized with two functions that
2782 describe the iterations that contain conflicting elements.
2784 Remark: For an integer k >= 0, the following equality is true:
2786 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2790 analyze_overlapping_iterations (tree chrec_a,
2792 conflict_function **overlap_iterations_a,
2793 conflict_function **overlap_iterations_b,
2794 tree *last_conflicts, struct loop *loop_nest)
2796 unsigned int lnn = loop_nest->num;
2798 dependence_stats.num_subscript_tests++;
2800 if (dump_file && (dump_flags & TDF_DETAILS))
2802 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2803 fprintf (dump_file, " (chrec_a = ");
2804 print_generic_expr (dump_file, chrec_a, 0);
2805 fprintf (dump_file, ")\n (chrec_b = ");
2806 print_generic_expr (dump_file, chrec_b, 0);
2807 fprintf (dump_file, ")\n");
2810 if (chrec_a == NULL_TREE
2811 || chrec_b == NULL_TREE
2812 || chrec_contains_undetermined (chrec_a)
2813 || chrec_contains_undetermined (chrec_b))
2815 dependence_stats.num_subscript_undetermined++;
2817 *overlap_iterations_a = conflict_fn_not_known ();
2818 *overlap_iterations_b = conflict_fn_not_known ();
2821 /* If they are the same chrec, and are affine, they overlap
2822 on every iteration. */
2823 else if (eq_evolutions_p (chrec_a, chrec_b)
2824 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2825 || operand_equal_p (chrec_a, chrec_b, 0)))
2827 dependence_stats.num_same_subscript_function++;
2828 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2829 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2830 *last_conflicts = chrec_dont_know;
2833 /* If they aren't the same, and aren't affine, we can't do anything
2835 else if ((chrec_contains_symbols (chrec_a)
2836 || chrec_contains_symbols (chrec_b))
2837 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2838 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2840 dependence_stats.num_subscript_undetermined++;
2841 *overlap_iterations_a = conflict_fn_not_known ();
2842 *overlap_iterations_b = conflict_fn_not_known ();
2845 else if (ziv_subscript_p (chrec_a, chrec_b))
2846 analyze_ziv_subscript (chrec_a, chrec_b,
2847 overlap_iterations_a, overlap_iterations_b,
2850 else if (siv_subscript_p (chrec_a, chrec_b))
2851 analyze_siv_subscript (chrec_a, chrec_b,
2852 overlap_iterations_a, overlap_iterations_b,
2853 last_conflicts, lnn);
2856 analyze_miv_subscript (chrec_a, chrec_b,
2857 overlap_iterations_a, overlap_iterations_b,
2858 last_conflicts, loop_nest);
2860 if (dump_file && (dump_flags & TDF_DETAILS))
2862 fprintf (dump_file, " (overlap_iterations_a = ");
2863 dump_conflict_function (dump_file, *overlap_iterations_a);
2864 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2865 dump_conflict_function (dump_file, *overlap_iterations_b);
2866 fprintf (dump_file, ")\n");
2867 fprintf (dump_file, ")\n");
2871 /* Helper function for uniquely inserting distance vectors. */
2874 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2879 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2880 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2883 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2886 /* Helper function for uniquely inserting direction vectors. */
2889 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2894 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2895 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2898 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2901 /* Add a distance of 1 on all the loops outer than INDEX. If we
2902 haven't yet determined a distance for this outer loop, push a new
2903 distance vector composed of the previous distance, and a distance
2904 of 1 for this outer loop. Example:
2912 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2913 save (0, 1), then we have to save (1, 0). */
2916 add_outer_distances (struct data_dependence_relation *ddr,
2917 lambda_vector dist_v, int index)
2919 /* For each outer loop where init_v is not set, the accesses are
2920 in dependence of distance 1 in the loop. */
2921 while (--index >= 0)
2923 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2924 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2926 save_dist_v (ddr, save_v);
2930 /* Return false when fail to represent the data dependence as a
2931 distance vector. INIT_B is set to true when a component has been
2932 added to the distance vector DIST_V. INDEX_CARRY is then set to
2933 the index in DIST_V that carries the dependence. */
2936 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2937 struct data_reference *ddr_a,
2938 struct data_reference *ddr_b,
2939 lambda_vector dist_v, bool *init_b,
2943 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2945 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2947 tree access_fn_a, access_fn_b;
2948 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2950 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2952 non_affine_dependence_relation (ddr);
2956 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2957 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2959 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2960 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2963 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2964 DDR_LOOP_NEST (ddr));
2965 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2966 DDR_LOOP_NEST (ddr));
2968 /* The dependence is carried by the outermost loop. Example:
2975 In this case, the dependence is carried by loop_1. */
2976 index = index_a < index_b ? index_a : index_b;
2977 *index_carry = MIN (index, *index_carry);
2979 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2981 non_affine_dependence_relation (ddr);
2985 dist = int_cst_value (SUB_DISTANCE (subscript));
2987 /* This is the subscript coupling test. If we have already
2988 recorded a distance for this loop (a distance coming from
2989 another subscript), it should be the same. For example,
2990 in the following code, there is no dependence:
2997 if (init_v[index] != 0 && dist_v[index] != dist)
2999 finalize_ddr_dependent (ddr, chrec_known);
3003 dist_v[index] = dist;
3007 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3009 /* This can be for example an affine vs. constant dependence
3010 (T[i] vs. T[3]) that is not an affine dependence and is
3011 not representable as a distance vector. */
3012 non_affine_dependence_relation (ddr);
3020 /* Return true when the DDR contains only constant access functions. */
3023 constant_access_functions (const struct data_dependence_relation *ddr)
3027 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3028 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3029 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3035 /* Helper function for the case where DDR_A and DDR_B are the same
3036 multivariate access function with a constant step. For an example
3040 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3043 tree c_1 = CHREC_LEFT (c_2);
3044 tree c_0 = CHREC_LEFT (c_1);
3045 lambda_vector dist_v;
3048 /* Polynomials with more than 2 variables are not handled yet. When
3049 the evolution steps are parameters, it is not possible to
3050 represent the dependence using classical distance vectors. */
3051 if (TREE_CODE (c_0) != INTEGER_CST
3052 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3053 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3055 DDR_AFFINE_P (ddr) = false;
3059 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3060 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3062 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3063 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3064 v1 = int_cst_value (CHREC_RIGHT (c_1));
3065 v2 = int_cst_value (CHREC_RIGHT (c_2));
3078 save_dist_v (ddr, dist_v);
3080 add_outer_distances (ddr, dist_v, x_1);
3083 /* Helper function for the case where DDR_A and DDR_B are the same
3084 access functions. */
3087 add_other_self_distances (struct data_dependence_relation *ddr)
3089 lambda_vector dist_v;
3091 int index_carry = DDR_NB_LOOPS (ddr);
3093 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3095 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3097 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3099 if (!evolution_function_is_univariate_p (access_fun))
3101 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3103 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3107 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3109 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3110 add_multivariate_self_dist (ddr, access_fun);
3112 /* The evolution step is not constant: it varies in
3113 the outer loop, so this cannot be represented by a
3114 distance vector. For example in pr34635.c the
3115 evolution is {0, +, {0, +, 4}_1}_2. */
3116 DDR_AFFINE_P (ddr) = false;
3121 index_carry = MIN (index_carry,
3122 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3123 DDR_LOOP_NEST (ddr)));
3127 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3128 add_outer_distances (ddr, dist_v, index_carry);
3132 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3134 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3136 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3137 save_dist_v (ddr, dist_v);
3140 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3141 is the case for example when access functions are the same and
3142 equal to a constant, as in:
3149 in which case the distance vectors are (0) and (1). */
3152 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3156 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3158 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3159 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3160 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3162 for (j = 0; j < ca->n; j++)
3163 if (affine_function_zero_p (ca->fns[j]))
3165 insert_innermost_unit_dist_vector (ddr);
3169 for (j = 0; j < cb->n; j++)
3170 if (affine_function_zero_p (cb->fns[j]))
3172 insert_innermost_unit_dist_vector (ddr);
3178 /* Compute the classic per loop distance vector. DDR is the data
3179 dependence relation to build a vector from. Return false when fail
3180 to represent the data dependence as a distance vector. */
3183 build_classic_dist_vector (struct data_dependence_relation *ddr,
3184 struct loop *loop_nest)
3186 bool init_b = false;
3187 int index_carry = DDR_NB_LOOPS (ddr);
3188 lambda_vector dist_v;
3190 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3193 if (same_access_functions (ddr))
3195 /* Save the 0 vector. */
3196 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3197 save_dist_v (ddr, dist_v);
3199 if (constant_access_functions (ddr))
3200 add_distance_for_zero_overlaps (ddr);
3202 if (DDR_NB_LOOPS (ddr) > 1)
3203 add_other_self_distances (ddr);
3208 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3209 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3210 dist_v, &init_b, &index_carry))
3213 /* Save the distance vector if we initialized one. */
3216 /* Verify a basic constraint: classic distance vectors should
3217 always be lexicographically positive.
3219 Data references are collected in the order of execution of
3220 the program, thus for the following loop
3222 | for (i = 1; i < 100; i++)
3223 | for (j = 1; j < 100; j++)
3225 | t = T[j+1][i-1]; // A
3226 | T[j][i] = t + 2; // B
3229 references are collected following the direction of the wind:
3230 A then B. The data dependence tests are performed also
3231 following this order, such that we're looking at the distance
3232 separating the elements accessed by A from the elements later
3233 accessed by B. But in this example, the distance returned by
3234 test_dep (A, B) is lexicographically negative (-1, 1), that
3235 means that the access A occurs later than B with respect to
3236 the outer loop, ie. we're actually looking upwind. In this
3237 case we solve test_dep (B, A) looking downwind to the
3238 lexicographically positive solution, that returns the
3239 distance vector (1, -1). */
3240 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3242 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3243 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3246 compute_subscript_distance (ddr);
3247 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3248 save_v, &init_b, &index_carry))
3250 save_dist_v (ddr, save_v);
3251 DDR_REVERSED_P (ddr) = true;
3253 /* In this case there is a dependence forward for all the
3256 | for (k = 1; k < 100; k++)
3257 | for (i = 1; i < 100; i++)
3258 | for (j = 1; j < 100; j++)
3260 | t = T[j+1][i-1]; // A
3261 | T[j][i] = t + 2; // B
3269 if (DDR_NB_LOOPS (ddr) > 1)
3271 add_outer_distances (ddr, save_v, index_carry);
3272 add_outer_distances (ddr, dist_v, index_carry);
3277 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3278 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3280 if (DDR_NB_LOOPS (ddr) > 1)
3282 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3284 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3285 DDR_A (ddr), loop_nest))
3287 compute_subscript_distance (ddr);
3288 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3289 opposite_v, &init_b,
3293 save_dist_v (ddr, save_v);
3294 add_outer_distances (ddr, dist_v, index_carry);
3295 add_outer_distances (ddr, opposite_v, index_carry);
3298 save_dist_v (ddr, save_v);
3303 /* There is a distance of 1 on all the outer loops: Example:
3304 there is a dependence of distance 1 on loop_1 for the array A.
3310 add_outer_distances (ddr, dist_v,
3311 lambda_vector_first_nz (dist_v,
3312 DDR_NB_LOOPS (ddr), 0));
3315 if (dump_file && (dump_flags & TDF_DETAILS))
3319 fprintf (dump_file, "(build_classic_dist_vector\n");
3320 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3322 fprintf (dump_file, " dist_vector = (");
3323 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3324 DDR_NB_LOOPS (ddr));
3325 fprintf (dump_file, " )\n");
3327 fprintf (dump_file, ")\n");
3333 /* Return the direction for a given distance.
3334 FIXME: Computing dir this way is suboptimal, since dir can catch
3335 cases that dist is unable to represent. */
3337 static inline enum data_dependence_direction
3338 dir_from_dist (int dist)
3341 return dir_positive;
3343 return dir_negative;
3348 /* Compute the classic per loop direction vector. DDR is the data
3349 dependence relation to build a vector from. */
3352 build_classic_dir_vector (struct data_dependence_relation *ddr)
3355 lambda_vector dist_v;
3357 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3359 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3361 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3362 dir_v[j] = dir_from_dist (dist_v[j]);
3364 save_dir_v (ddr, dir_v);
3368 /* Helper function. Returns true when there is a dependence between
3369 data references DRA and DRB. */
3372 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3373 struct data_reference *dra,
3374 struct data_reference *drb,
3375 struct loop *loop_nest)
3378 tree last_conflicts;
3379 struct subscript *subscript;
3381 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3384 conflict_function *overlaps_a, *overlaps_b;
3386 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3387 DR_ACCESS_FN (drb, i),
3388 &overlaps_a, &overlaps_b,
3389 &last_conflicts, loop_nest);
3391 if (CF_NOT_KNOWN_P (overlaps_a)
3392 || CF_NOT_KNOWN_P (overlaps_b))
3394 finalize_ddr_dependent (ddr, chrec_dont_know);
3395 dependence_stats.num_dependence_undetermined++;
3396 free_conflict_function (overlaps_a);
3397 free_conflict_function (overlaps_b);
3401 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3402 || CF_NO_DEPENDENCE_P (overlaps_b))
3404 finalize_ddr_dependent (ddr, chrec_known);
3405 dependence_stats.num_dependence_independent++;
3406 free_conflict_function (overlaps_a);
3407 free_conflict_function (overlaps_b);
3413 if (SUB_CONFLICTS_IN_A (subscript))
3414 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3415 if (SUB_CONFLICTS_IN_B (subscript))
3416 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3418 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3419 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3420 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3427 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3430 subscript_dependence_tester (struct data_dependence_relation *ddr,
3431 struct loop *loop_nest)
3434 if (dump_file && (dump_flags & TDF_DETAILS))
3435 fprintf (dump_file, "(subscript_dependence_tester \n");
3437 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3438 dependence_stats.num_dependence_dependent++;
3440 compute_subscript_distance (ddr);
3441 if (build_classic_dist_vector (ddr, loop_nest))
3442 build_classic_dir_vector (ddr);
3444 if (dump_file && (dump_flags & TDF_DETAILS))
3445 fprintf (dump_file, ")\n");
3448 /* Returns true when all the access functions of A are affine or
3449 constant with respect to LOOP_NEST. */
3452 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3453 const struct loop *loop_nest)
3456 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3459 FOR_EACH_VEC_ELT (tree, fns, i, t)
3460 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3461 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3467 /* Initializes an equation for an OMEGA problem using the information
3468 contained in the ACCESS_FUN. Returns true when the operation
3471 PB is the omega constraint system.
3472 EQ is the number of the equation to be initialized.
3473 OFFSET is used for shifting the variables names in the constraints:
3474 a constrain is composed of 2 * the number of variables surrounding
3475 dependence accesses. OFFSET is set either to 0 for the first n variables,
3476 then it is set to n.
3477 ACCESS_FUN is expected to be an affine chrec. */
3480 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3481 unsigned int offset, tree access_fun,
3482 struct data_dependence_relation *ddr)
3484 switch (TREE_CODE (access_fun))
3486 case POLYNOMIAL_CHREC:
3488 tree left = CHREC_LEFT (access_fun);
3489 tree right = CHREC_RIGHT (access_fun);
3490 int var = CHREC_VARIABLE (access_fun);
3493 if (TREE_CODE (right) != INTEGER_CST)
3496 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3497 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3499 /* Compute the innermost loop index. */
3500 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3503 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3504 += int_cst_value (right);
3506 switch (TREE_CODE (left))
3508 case POLYNOMIAL_CHREC:
3509 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3512 pb->eqs[eq].coef[0] += int_cst_value (left);
3521 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3529 /* As explained in the comments preceding init_omega_for_ddr, we have
3530 to set up a system for each loop level, setting outer loops
3531 variation to zero, and current loop variation to positive or zero.
3532 Save each lexico positive distance vector. */
3535 omega_extract_distance_vectors (omega_pb pb,
3536 struct data_dependence_relation *ddr)
3540 struct loop *loopi, *loopj;
3541 enum omega_result res;
3543 /* Set a new problem for each loop in the nest. The basis is the
3544 problem that we have initialized until now. On top of this we
3545 add new constraints. */
3546 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3547 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3550 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3551 DDR_NB_LOOPS (ddr));
3553 omega_copy_problem (copy, pb);
3555 /* For all the outer loops "loop_j", add "dj = 0". */
3557 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3559 eq = omega_add_zero_eq (copy, omega_black);
3560 copy->eqs[eq].coef[j + 1] = 1;
3563 /* For "loop_i", add "0 <= di". */
3564 geq = omega_add_zero_geq (copy, omega_black);
3565 copy->geqs[geq].coef[i + 1] = 1;
3567 /* Reduce the constraint system, and test that the current
3568 problem is feasible. */
3569 res = omega_simplify_problem (copy);
3570 if (res == omega_false
3571 || res == omega_unknown
3572 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3575 for (eq = 0; eq < copy->num_subs; eq++)
3576 if (copy->subs[eq].key == (int) i + 1)
3578 dist = copy->subs[eq].coef[0];
3584 /* Reinitialize problem... */
3585 omega_copy_problem (copy, pb);
3587 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3589 eq = omega_add_zero_eq (copy, omega_black);
3590 copy->eqs[eq].coef[j + 1] = 1;
3593 /* ..., but this time "di = 1". */
3594 eq = omega_add_zero_eq (copy, omega_black);
3595 copy->eqs[eq].coef[i + 1] = 1;
3596 copy->eqs[eq].coef[0] = -1;
3598 res = omega_simplify_problem (copy);
3599 if (res == omega_false
3600 || res == omega_unknown
3601 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3604 for (eq = 0; eq < copy->num_subs; eq++)
3605 if (copy->subs[eq].key == (int) i + 1)
3607 dist = copy->subs[eq].coef[0];
3613 /* Save the lexicographically positive distance vector. */
3616 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3617 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3621 for (eq = 0; eq < copy->num_subs; eq++)
3622 if (copy->subs[eq].key > 0)
3624 dist = copy->subs[eq].coef[0];
3625 dist_v[copy->subs[eq].key - 1] = dist;
3628 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3629 dir_v[j] = dir_from_dist (dist_v[j]);
3631 save_dist_v (ddr, dist_v);
3632 save_dir_v (ddr, dir_v);
3636 omega_free_problem (copy);
3640 /* This is called for each subscript of a tuple of data references:
3641 insert an equality for representing the conflicts. */
3644 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3645 struct data_dependence_relation *ddr,
3646 omega_pb pb, bool *maybe_dependent)
3649 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3650 TREE_TYPE (access_fun_b));
3651 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3652 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3653 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3656 /* When the fun_a - fun_b is not constant, the dependence is not
3657 captured by the classic distance vector representation. */
3658 if (TREE_CODE (difference) != INTEGER_CST)
3662 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3664 /* There is no dependence. */
3665 *maybe_dependent = false;
3669 minus_one = build_int_cst (type, -1);
3670 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3672 eq = omega_add_zero_eq (pb, omega_black);
3673 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3674 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3675 /* There is probably a dependence, but the system of
3676 constraints cannot be built: answer "don't know". */
3680 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3681 && !int_divides_p (lambda_vector_gcd
3682 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3683 2 * DDR_NB_LOOPS (ddr)),
3684 pb->eqs[eq].coef[0]))
3686 /* There is no dependence. */
3687 *maybe_dependent = false;
3694 /* Helper function, same as init_omega_for_ddr but specialized for
3695 data references A and B. */
3698 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3699 struct data_dependence_relation *ddr,
3700 omega_pb pb, bool *maybe_dependent)
3705 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3707 /* Insert an equality per subscript. */
3708 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3710 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3711 ddr, pb, maybe_dependent))
3713 else if (*maybe_dependent == false)
3715 /* There is no dependence. */
3716 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3721 /* Insert inequalities: constraints corresponding to the iteration
3722 domain, i.e. the loops surrounding the references "loop_x" and
3723 the distance variables "dx". The layout of the OMEGA
3724 representation is as follows:
3725 - coef[0] is the constant
3726 - coef[1..nb_loops] are the protected variables that will not be
3727 removed by the solver: the "dx"
3728 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3730 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3731 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3733 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3736 ineq = omega_add_zero_geq (pb, omega_black);
3737 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3739 /* 0 <= loop_x + dx */
3740 ineq = omega_add_zero_geq (pb, omega_black);
3741 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3742 pb->geqs[ineq].coef[i + 1] = 1;
3746 /* loop_x <= nb_iters */
3747 ineq = omega_add_zero_geq (pb, omega_black);
3748 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3749 pb->geqs[ineq].coef[0] = nbi;
3751 /* loop_x + dx <= nb_iters */
3752 ineq = omega_add_zero_geq (pb, omega_black);
3753 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3754 pb->geqs[ineq].coef[i + 1] = -1;
3755 pb->geqs[ineq].coef[0] = nbi;
3757 /* A step "dx" bigger than nb_iters is not feasible, so
3758 add "0 <= nb_iters + dx", */
3759 ineq = omega_add_zero_geq (pb, omega_black);
3760 pb->geqs[ineq].coef[i + 1] = 1;
3761 pb->geqs[ineq].coef[0] = nbi;
3762 /* and "dx <= nb_iters". */
3763 ineq = omega_add_zero_geq (pb, omega_black);
3764 pb->geqs[ineq].coef[i + 1] = -1;
3765 pb->geqs[ineq].coef[0] = nbi;
3769 omega_extract_distance_vectors (pb, ddr);
3774 /* Sets up the Omega dependence problem for the data dependence
3775 relation DDR. Returns false when the constraint system cannot be
3776 built, ie. when the test answers "don't know". Returns true
3777 otherwise, and when independence has been proved (using one of the
3778 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3779 set MAYBE_DEPENDENT to true.
3781 Example: for setting up the dependence system corresponding to the
3782 conflicting accesses
3787 | ... A[2*j, 2*(i + j)]
3791 the following constraints come from the iteration domain:
3798 where di, dj are the distance variables. The constraints
3799 representing the conflicting elements are:
3802 i + 1 = 2 * (i + di + j + dj)
3804 For asking that the resulting distance vector (di, dj) be
3805 lexicographically positive, we insert the constraint "di >= 0". If
3806 "di = 0" in the solution, we fix that component to zero, and we
3807 look at the inner loops: we set a new problem where all the outer
3808 loop distances are zero, and fix this inner component to be
3809 positive. When one of the components is positive, we save that
3810 distance, and set a new problem where the distance on this loop is
3811 zero, searching for other distances in the inner loops. Here is
3812 the classic example that illustrates that we have to set for each
3813 inner loop a new problem:
3821 we have to save two distances (1, 0) and (0, 1).
3823 Given two array references, refA and refB, we have to set the
3824 dependence problem twice, refA vs. refB and refB vs. refA, and we
3825 cannot do a single test, as refB might occur before refA in the
3826 inner loops, and the contrary when considering outer loops: ex.
3831 | T[{1,+,1}_2][{1,+,1}_1] // refA
3832 | T[{2,+,1}_2][{0,+,1}_1] // refB
3837 refB touches the elements in T before refA, and thus for the same
3838 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3839 but for successive loop_0 iterations, we have (1, -1, 1)
3841 The Omega solver expects the distance variables ("di" in the
3842 previous example) to come first in the constraint system (as
3843 variables to be protected, or "safe" variables), the constraint
3844 system is built using the following layout:
3846 "cst | distance vars | index vars".
3850 init_omega_for_ddr (struct data_dependence_relation *ddr,
3851 bool *maybe_dependent)
3856 *maybe_dependent = true;
3858 if (same_access_functions (ddr))
3861 lambda_vector dir_v;
3863 /* Save the 0 vector. */
3864 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3865 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3866 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3867 dir_v[j] = dir_equal;
3868 save_dir_v (ddr, dir_v);
3870 /* Save the dependences carried by outer loops. */
3871 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3872 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3874 omega_free_problem (pb);
3878 /* Omega expects the protected variables (those that have to be kept
3879 after elimination) to appear first in the constraint system.
3880 These variables are the distance variables. In the following
3881 initialization we declare NB_LOOPS safe variables, and the total
3882 number of variables for the constraint system is 2*NB_LOOPS. */
3883 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3884 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3886 omega_free_problem (pb);
3888 /* Stop computation if not decidable, or no dependence. */
3889 if (res == false || *maybe_dependent == false)
3892 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3893 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3895 omega_free_problem (pb);
3900 /* Return true when DDR contains the same information as that stored
3901 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3904 ddr_consistent_p (FILE *file,
3905 struct data_dependence_relation *ddr,
3906 VEC (lambda_vector, heap) *dist_vects,
3907 VEC (lambda_vector, heap) *dir_vects)
3911 /* If dump_file is set, output there. */
3912 if (dump_file && (dump_flags & TDF_DETAILS))
3915 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3917 lambda_vector b_dist_v;
3918 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3919 VEC_length (lambda_vector, dist_vects),
3920 DDR_NUM_DIST_VECTS (ddr));
3922 fprintf (file, "Banerjee dist vectors:\n");
3923 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3924 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3926 fprintf (file, "Omega dist vectors:\n");
3927 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3928 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3930 fprintf (file, "data dependence relation:\n");
3931 dump_data_dependence_relation (file, ddr);
3933 fprintf (file, ")\n");
3937 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3939 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3940 VEC_length (lambda_vector, dir_vects),
3941 DDR_NUM_DIR_VECTS (ddr));
3945 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3947 lambda_vector a_dist_v;
3948 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3950 /* Distance vectors are not ordered in the same way in the DDR
3951 and in the DIST_VECTS: search for a matching vector. */
3952 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3953 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3956 if (j == VEC_length (lambda_vector, dist_vects))
3958 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3959 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3960 fprintf (file, "not found in Omega dist vectors:\n");
3961 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3962 fprintf (file, "data dependence relation:\n");
3963 dump_data_dependence_relation (file, ddr);
3964 fprintf (file, ")\n");
3968 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3970 lambda_vector a_dir_v;
3971 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3973 /* Direction vectors are not ordered in the same way in the DDR
3974 and in the DIR_VECTS: search for a matching vector. */
3975 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3976 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3979 if (j == VEC_length (lambda_vector, dist_vects))
3981 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3982 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3983 fprintf (file, "not found in Omega dir vectors:\n");
3984 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3985 fprintf (file, "data dependence relation:\n");
3986 dump_data_dependence_relation (file, ddr);
3987 fprintf (file, ")\n");
3994 /* This computes the affine dependence relation between A and B with
3995 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3996 independence between two accesses, while CHREC_DONT_KNOW is used
3997 for representing the unknown relation.
3999 Note that it is possible to stop the computation of the dependence
4000 relation the first time we detect a CHREC_KNOWN element for a given
4004 compute_affine_dependence (struct data_dependence_relation *ddr,
4005 struct loop *loop_nest)
4007 struct data_reference *dra = DDR_A (ddr);
4008 struct data_reference *drb = DDR_B (ddr);
4010 if (dump_file && (dump_flags & TDF_DETAILS))
4012 fprintf (dump_file, "(compute_affine_dependence\n");
4013 fprintf (dump_file, " (stmt_a = \n");
4014 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4015 fprintf (dump_file, ")\n (stmt_b = \n");
4016 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4017 fprintf (dump_file, ")\n");
4020 /* Analyze only when the dependence relation is not yet known. */
4021 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4022 && !DDR_SELF_REFERENCE (ddr))
4024 dependence_stats.num_dependence_tests++;
4026 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4027 && access_functions_are_affine_or_constant_p (drb, loop_nest))
4029 if (flag_check_data_deps)
4031 /* Compute the dependences using the first algorithm. */
4032 subscript_dependence_tester (ddr, loop_nest);
4034 if (dump_file && (dump_flags & TDF_DETAILS))
4036 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4037 dump_data_dependence_relation (dump_file, ddr);
4040 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4042 bool maybe_dependent;
4043 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4045 /* Save the result of the first DD analyzer. */
4046 dist_vects = DDR_DIST_VECTS (ddr);
4047 dir_vects = DDR_DIR_VECTS (ddr);
4049 /* Reset the information. */
4050 DDR_DIST_VECTS (ddr) = NULL;
4051 DDR_DIR_VECTS (ddr) = NULL;
4053 /* Compute the same information using Omega. */
4054 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4055 goto csys_dont_know;
4057 if (dump_file && (dump_flags & TDF_DETAILS))
4059 fprintf (dump_file, "Omega Analyzer\n");
4060 dump_data_dependence_relation (dump_file, ddr);
4063 /* Check that we get the same information. */
4064 if (maybe_dependent)
4065 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4070 subscript_dependence_tester (ddr, loop_nest);
4073 /* As a last case, if the dependence cannot be determined, or if
4074 the dependence is considered too difficult to determine, answer
4079 dependence_stats.num_dependence_undetermined++;
4081 if (dump_file && (dump_flags & TDF_DETAILS))
4083 fprintf (dump_file, "Data ref a:\n");
4084 dump_data_reference (dump_file, dra);
4085 fprintf (dump_file, "Data ref b:\n");
4086 dump_data_reference (dump_file, drb);
4087 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4089 finalize_ddr_dependent (ddr, chrec_dont_know);
4093 if (dump_file && (dump_flags & TDF_DETAILS))
4094 fprintf (dump_file, ")\n");
4097 /* This computes the dependence relation for the same data
4098 reference into DDR. */
4101 compute_self_dependence (struct data_dependence_relation *ddr)
4104 struct subscript *subscript;
4106 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4109 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4112 if (SUB_CONFLICTS_IN_A (subscript))
4113 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4114 if (SUB_CONFLICTS_IN_B (subscript))
4115 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4117 /* The accessed index overlaps for each iteration. */
4118 SUB_CONFLICTS_IN_A (subscript)
4119 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4120 SUB_CONFLICTS_IN_B (subscript)
4121 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4122 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4125 /* The distance vector is the zero vector. */
4126 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4127 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4130 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4131 the data references in DATAREFS, in the LOOP_NEST. When
4132 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4136 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4137 VEC (ddr_p, heap) **dependence_relations,
4138 VEC (loop_p, heap) *loop_nest,
4139 bool compute_self_and_rr)
4141 struct data_dependence_relation *ddr;
4142 struct data_reference *a, *b;
4145 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4146 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4147 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4149 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4150 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4152 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4155 if (compute_self_and_rr)
4156 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4158 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4159 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4160 compute_self_dependence (ddr);
4164 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4165 true if STMT clobbers memory, false otherwise. */
4168 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4170 bool clobbers_memory = false;
4173 enum gimple_code stmt_code = gimple_code (stmt);
4177 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4178 Calls have side-effects, except those to const or pure
4180 if ((stmt_code == GIMPLE_CALL
4181 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4182 || (stmt_code == GIMPLE_ASM
4183 && gimple_asm_volatile_p (stmt)))
4184 clobbers_memory = true;
4186 if (!gimple_vuse (stmt))
4187 return clobbers_memory;
4189 if (stmt_code == GIMPLE_ASSIGN)
4192 op0 = gimple_assign_lhs_ptr (stmt);
4193 op1 = gimple_assign_rhs1_ptr (stmt);
4196 || (REFERENCE_CLASS_P (*op1)
4197 && (base = get_base_address (*op1))
4198 && TREE_CODE (base) != SSA_NAME))
4200 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4202 ref->is_read = true;
4206 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4208 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4210 ref->is_read = false;
4213 else if (stmt_code == GIMPLE_CALL)
4215 unsigned i, n = gimple_call_num_args (stmt);
4217 for (i = 0; i < n; i++)
4219 op0 = gimple_call_arg_ptr (stmt, i);
4222 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4224 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4226 ref->is_read = true;
4231 return clobbers_memory;
4234 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4235 reference, returns false, otherwise returns true. NEST is the outermost
4236 loop of the loop nest in which the references should be analyzed. */
4239 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4240 VEC (data_reference_p, heap) **datarefs)
4243 VEC (data_ref_loc, heap) *references;
4246 data_reference_p dr;
4248 if (get_references_in_stmt (stmt, &references))
4250 VEC_free (data_ref_loc, heap, references);
4254 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4256 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4257 *ref->pos, stmt, ref->is_read);
4258 gcc_assert (dr != NULL);
4260 /* FIXME -- data dependence analysis does not work correctly for objects
4261 with invariant addresses in loop nests. Let us fail here until the
4262 problem is fixed. */
4263 if (dr_address_invariant_p (dr) && nest)
4266 if (dump_file && (dump_flags & TDF_DETAILS))
4267 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4272 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4274 VEC_free (data_ref_loc, heap, references);
4278 /* Stores the data references in STMT to DATAREFS. If there is an
4279 unanalyzable reference, returns false, otherwise returns true.
4280 NEST is the outermost loop of the loop nest in which the references
4281 should be instantiated, LOOP is the loop in which the references
4282 should be analyzed. */
4285 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4286 VEC (data_reference_p, heap) **datarefs)
4289 VEC (data_ref_loc, heap) *references;
4292 data_reference_p dr;
4294 if (get_references_in_stmt (stmt, &references))
4296 VEC_free (data_ref_loc, heap, references);
4300 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4302 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4303 gcc_assert (dr != NULL);
4304 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4307 VEC_free (data_ref_loc, heap, references);
4311 /* Search the data references in LOOP, and record the information into
4312 DATAREFS. Returns chrec_dont_know when failing to analyze a
4313 difficult case, returns NULL_TREE otherwise. */
4316 find_data_references_in_bb (struct loop *loop, basic_block bb,
4317 VEC (data_reference_p, heap) **datarefs)
4319 gimple_stmt_iterator bsi;
4321 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4323 gimple stmt = gsi_stmt (bsi);
4325 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4327 struct data_reference *res;
4328 res = XCNEW (struct data_reference);
4329 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4331 return chrec_dont_know;
4338 /* Search the data references in LOOP, and record the information into
4339 DATAREFS. Returns chrec_dont_know when failing to analyze a
4340 difficult case, returns NULL_TREE otherwise.
4342 TODO: This function should be made smarter so that it can handle address
4343 arithmetic as if they were array accesses, etc. */
4346 find_data_references_in_loop (struct loop *loop,
4347 VEC (data_reference_p, heap) **datarefs)
4349 basic_block bb, *bbs;
4352 bbs = get_loop_body_in_dom_order (loop);
4354 for (i = 0; i < loop->num_nodes; i++)
4358 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4361 return chrec_dont_know;
4369 /* Recursive helper function. */
4372 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4374 /* Inner loops of the nest should not contain siblings. Example:
4375 when there are two consecutive loops,
4386 the dependence relation cannot be captured by the distance
4391 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4393 return find_loop_nest_1 (loop->inner, loop_nest);
4397 /* Return false when the LOOP is not well nested. Otherwise return
4398 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4399 contain the loops from the outermost to the innermost, as they will
4400 appear in the classic distance vector. */
4403 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4405 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4407 return find_loop_nest_1 (loop->inner, loop_nest);
4411 /* Returns true when the data dependences have been computed, false otherwise.
4412 Given a loop nest LOOP, the following vectors are returned:
4413 DATAREFS is initialized to all the array elements contained in this loop,
4414 DEPENDENCE_RELATIONS contains the relations between the data references.
4415 Compute read-read and self relations if
4416 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4419 compute_data_dependences_for_loop (struct loop *loop,
4420 bool compute_self_and_read_read_dependences,
4421 VEC (loop_p, heap) **loop_nest,
4422 VEC (data_reference_p, heap) **datarefs,
4423 VEC (ddr_p, heap) **dependence_relations)
4427 memset (&dependence_stats, 0, sizeof (dependence_stats));
4429 /* If the loop nest is not well formed, or one of the data references
4430 is not computable, give up without spending time to compute other
4433 || !find_loop_nest (loop, loop_nest)
4434 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4436 struct data_dependence_relation *ddr;
4438 /* Insert a single relation into dependence_relations:
4440 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4441 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4445 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4446 compute_self_and_read_read_dependences);
4448 if (dump_file && (dump_flags & TDF_STATS))
4450 fprintf (dump_file, "Dependence tester statistics:\n");
4452 fprintf (dump_file, "Number of dependence tests: %d\n",
4453 dependence_stats.num_dependence_tests);
4454 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4455 dependence_stats.num_dependence_dependent);
4456 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4457 dependence_stats.num_dependence_independent);
4458 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4459 dependence_stats.num_dependence_undetermined);
4461 fprintf (dump_file, "Number of subscript tests: %d\n",
4462 dependence_stats.num_subscript_tests);
4463 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4464 dependence_stats.num_subscript_undetermined);
4465 fprintf (dump_file, "Number of same subscript function: %d\n",
4466 dependence_stats.num_same_subscript_function);
4468 fprintf (dump_file, "Number of ziv tests: %d\n",
4469 dependence_stats.num_ziv);
4470 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4471 dependence_stats.num_ziv_dependent);
4472 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4473 dependence_stats.num_ziv_independent);
4474 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4475 dependence_stats.num_ziv_unimplemented);
4477 fprintf (dump_file, "Number of siv tests: %d\n",
4478 dependence_stats.num_siv);
4479 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4480 dependence_stats.num_siv_dependent);
4481 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4482 dependence_stats.num_siv_independent);
4483 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4484 dependence_stats.num_siv_unimplemented);
4486 fprintf (dump_file, "Number of miv tests: %d\n",
4487 dependence_stats.num_miv);
4488 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4489 dependence_stats.num_miv_dependent);
4490 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4491 dependence_stats.num_miv_independent);
4492 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4493 dependence_stats.num_miv_unimplemented);
4499 /* Returns true when the data dependences for the basic block BB have been
4500 computed, false otherwise.
4501 DATAREFS is initialized to all the array elements contained in this basic
4502 block, DEPENDENCE_RELATIONS contains the relations between the data
4503 references. Compute read-read and self relations if
4504 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4506 compute_data_dependences_for_bb (basic_block bb,
4507 bool compute_self_and_read_read_dependences,
4508 VEC (data_reference_p, heap) **datarefs,
4509 VEC (ddr_p, heap) **dependence_relations)
4511 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4514 compute_all_dependences (*datarefs, dependence_relations, NULL,
4515 compute_self_and_read_read_dependences);
4519 /* Entry point (for testing only). Analyze all the data references
4520 and the dependence relations in LOOP.
4522 The data references are computed first.
4524 A relation on these nodes is represented by a complete graph. Some
4525 of the relations could be of no interest, thus the relations can be
4528 In the following function we compute all the relations. This is
4529 just a first implementation that is here for:
4530 - for showing how to ask for the dependence relations,
4531 - for the debugging the whole dependence graph,
4532 - for the dejagnu testcases and maintenance.
4534 It is possible to ask only for a part of the graph, avoiding to
4535 compute the whole dependence graph. The computed dependences are
4536 stored in a knowledge base (KB) such that later queries don't
4537 recompute the same information. The implementation of this KB is
4538 transparent to the optimizer, and thus the KB can be changed with a
4539 more efficient implementation, or the KB could be disabled. */
4541 analyze_all_data_dependences (struct loop *loop)
4544 int nb_data_refs = 10;
4545 VEC (data_reference_p, heap) *datarefs =
4546 VEC_alloc (data_reference_p, heap, nb_data_refs);
4547 VEC (ddr_p, heap) *dependence_relations =
4548 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4549 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4551 /* Compute DDs on the whole function. */
4552 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4553 &dependence_relations);
4557 dump_data_dependence_relations (dump_file, dependence_relations);
4558 fprintf (dump_file, "\n\n");
4560 if (dump_flags & TDF_DETAILS)
4561 dump_dist_dir_vectors (dump_file, dependence_relations);
4563 if (dump_flags & TDF_STATS)
4565 unsigned nb_top_relations = 0;
4566 unsigned nb_bot_relations = 0;
4567 unsigned nb_chrec_relations = 0;
4568 struct data_dependence_relation *ddr;
4570 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4572 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4575 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4579 nb_chrec_relations++;
4582 gather_stats_on_scev_database ();
4586 VEC_free (loop_p, heap, loop_nest);
4587 free_dependence_relations (dependence_relations);
4588 free_data_refs (datarefs);
4591 /* Computes all the data dependences and check that the results of
4592 several analyzers are the same. */
4595 tree_check_data_deps (void)
4598 struct loop *loop_nest;
4600 FOR_EACH_LOOP (li, loop_nest, 0)
4601 analyze_all_data_dependences (loop_nest);
4604 /* Free the memory used by a data dependence relation DDR. */
4607 free_dependence_relation (struct data_dependence_relation *ddr)
4612 if (DDR_SUBSCRIPTS (ddr))
4613 free_subscripts (DDR_SUBSCRIPTS (ddr));
4614 if (DDR_DIST_VECTS (ddr))
4615 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4616 if (DDR_DIR_VECTS (ddr))
4617 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4622 /* Free the memory used by the data dependence relations from
4623 DEPENDENCE_RELATIONS. */
4626 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4629 struct data_dependence_relation *ddr;
4631 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4633 free_dependence_relation (ddr);
4635 VEC_free (ddr_p, heap, dependence_relations);
4638 /* Free the memory used by the data references from DATAREFS. */
4641 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4644 struct data_reference *dr;
4646 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4648 VEC_free (data_reference_p, heap, datarefs);
4653 /* Dump vertex I in RDG to FILE. */
4656 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4658 struct vertex *v = &(rdg->vertices[i]);
4659 struct graph_edge *e;
4661 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4662 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4663 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4666 for (e = v->pred; e; e = e->pred_next)
4667 fprintf (file, " %d", e->src);
4669 fprintf (file, ") (out:");
4672 for (e = v->succ; e; e = e->succ_next)
4673 fprintf (file, " %d", e->dest);
4675 fprintf (file, ")\n");
4676 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4677 fprintf (file, ")\n");
4680 /* Call dump_rdg_vertex on stderr. */
4683 debug_rdg_vertex (struct graph *rdg, int i)
4685 dump_rdg_vertex (stderr, rdg, i);
4688 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4689 dumped vertices to that bitmap. */
4691 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4695 fprintf (file, "(%d\n", c);
4697 for (i = 0; i < rdg->n_vertices; i++)
4698 if (rdg->vertices[i].component == c)
4701 bitmap_set_bit (dumped, i);
4703 dump_rdg_vertex (file, rdg, i);
4706 fprintf (file, ")\n");
4709 /* Call dump_rdg_vertex on stderr. */
4712 debug_rdg_component (struct graph *rdg, int c)
4714 dump_rdg_component (stderr, rdg, c, NULL);
4717 /* Dump the reduced dependence graph RDG to FILE. */
4720 dump_rdg (FILE *file, struct graph *rdg)
4723 bitmap dumped = BITMAP_ALLOC (NULL);
4725 fprintf (file, "(rdg\n");
4727 for (i = 0; i < rdg->n_vertices; i++)
4728 if (!bitmap_bit_p (dumped, i))
4729 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4731 fprintf (file, ")\n");
4732 BITMAP_FREE (dumped);
4735 /* Call dump_rdg on stderr. */
4738 debug_rdg (struct graph *rdg)
4740 dump_rdg (stderr, rdg);
4744 dot_rdg_1 (FILE *file, struct graph *rdg)
4748 fprintf (file, "digraph RDG {\n");
4750 for (i = 0; i < rdg->n_vertices; i++)
4752 struct vertex *v = &(rdg->vertices[i]);
4753 struct graph_edge *e;
4755 /* Highlight reads from memory. */
4756 if (RDG_MEM_READS_STMT (rdg, i))
4757 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4759 /* Highlight stores to memory. */
4760 if (RDG_MEM_WRITE_STMT (rdg, i))
4761 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4764 for (e = v->succ; e; e = e->succ_next)
4765 switch (RDGE_TYPE (e))
4768 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4772 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4776 /* These are the most common dependences: don't print these. */
4777 fprintf (file, "%d -> %d \n", i, e->dest);
4781 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4789 fprintf (file, "}\n\n");
4792 /* Display the Reduced Dependence Graph using dotty. */
4793 extern void dot_rdg (struct graph *);
4796 dot_rdg (struct graph *rdg)
4798 /* When debugging, enable the following code. This cannot be used
4799 in production compilers because it calls "system". */
4801 FILE *file = fopen ("/tmp/rdg.dot", "w");
4802 gcc_assert (file != NULL);
4804 dot_rdg_1 (file, rdg);
4807 system ("dotty /tmp/rdg.dot &");
4809 dot_rdg_1 (stderr, rdg);
4813 /* This structure is used for recording the mapping statement index in
4816 struct GTY(()) rdg_vertex_info
4822 /* Returns the index of STMT in RDG. */
4825 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4827 struct rdg_vertex_info rvi, *slot;
4830 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4838 /* Creates an edge in RDG for each distance vector from DDR. The
4839 order that we keep track of in the RDG is the order in which
4840 statements have to be executed. */
4843 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4845 struct graph_edge *e;
4847 data_reference_p dra = DDR_A (ddr);
4848 data_reference_p drb = DDR_B (ddr);
4849 unsigned level = ddr_dependence_level (ddr);
4851 /* For non scalar dependences, when the dependence is REVERSED,
4852 statement B has to be executed before statement A. */
4854 && !DDR_REVERSED_P (ddr))
4856 data_reference_p tmp = dra;
4861 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4862 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4864 if (va < 0 || vb < 0)
4867 e = add_edge (rdg, va, vb);
4868 e->data = XNEW (struct rdg_edge);
4870 RDGE_LEVEL (e) = level;
4871 RDGE_RELATION (e) = ddr;
4873 /* Determines the type of the data dependence. */
4874 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4875 RDGE_TYPE (e) = input_dd;
4876 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4877 RDGE_TYPE (e) = output_dd;
4878 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4879 RDGE_TYPE (e) = flow_dd;
4880 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4881 RDGE_TYPE (e) = anti_dd;
4884 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4885 the index of DEF in RDG. */
4888 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4890 use_operand_p imm_use_p;
4891 imm_use_iterator iterator;
4893 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4895 struct graph_edge *e;
4896 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4901 e = add_edge (rdg, idef, use);
4902 e->data = XNEW (struct rdg_edge);
4903 RDGE_TYPE (e) = flow_dd;
4904 RDGE_RELATION (e) = NULL;
4908 /* Creates the edges of the reduced dependence graph RDG. */
4911 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4914 struct data_dependence_relation *ddr;
4915 def_operand_p def_p;
4918 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4919 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4920 create_rdg_edge_for_ddr (rdg, ddr);
4922 for (i = 0; i < rdg->n_vertices; i++)
4923 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4925 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4928 /* Build the vertices of the reduced dependence graph RDG. */
4931 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4936 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4938 VEC (data_ref_loc, heap) *references;
4940 struct vertex *v = &(rdg->vertices[i]);
4941 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4942 struct rdg_vertex_info **slot;
4946 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4953 v->data = XNEW (struct rdg_vertex);
4954 RDG_STMT (rdg, i) = stmt;
4956 RDG_MEM_WRITE_STMT (rdg, i) = false;
4957 RDG_MEM_READS_STMT (rdg, i) = false;
4958 if (gimple_code (stmt) == GIMPLE_PHI)
4961 get_references_in_stmt (stmt, &references);
4962 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4964 RDG_MEM_WRITE_STMT (rdg, i) = true;
4966 RDG_MEM_READS_STMT (rdg, i) = true;
4968 VEC_free (data_ref_loc, heap, references);
4972 /* Initialize STMTS with all the statements of LOOP. When
4973 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4974 which we discover statements is important as
4975 generate_loops_for_partition is using the same traversal for
4976 identifying statements. */
4979 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4982 basic_block *bbs = get_loop_body_in_dom_order (loop);
4984 for (i = 0; i < loop->num_nodes; i++)
4986 basic_block bb = bbs[i];
4987 gimple_stmt_iterator bsi;
4990 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4991 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4993 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4995 stmt = gsi_stmt (bsi);
4996 if (gimple_code (stmt) != GIMPLE_LABEL)
4997 VEC_safe_push (gimple, heap, *stmts, stmt);
5004 /* Returns true when all the dependences are computable. */
5007 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5012 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5013 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5019 /* Computes a hash function for element ELT. */
5022 hash_stmt_vertex_info (const void *elt)
5024 const struct rdg_vertex_info *const rvi =
5025 (const struct rdg_vertex_info *) elt;
5026 gimple stmt = rvi->stmt;
5028 return htab_hash_pointer (stmt);
5031 /* Compares database elements E1 and E2. */
5034 eq_stmt_vertex_info (const void *e1, const void *e2)
5036 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5037 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5039 return elt1->stmt == elt2->stmt;
5042 /* Free the element E. */
5045 hash_stmt_vertex_del (void *e)
5050 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5051 statement of the loop nest, and one edge per data dependence or
5052 scalar dependence. */
5055 build_empty_rdg (int n_stmts)
5057 int nb_data_refs = 10;
5058 struct graph *rdg = new_graph (n_stmts);
5060 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5061 eq_stmt_vertex_info, hash_stmt_vertex_del);
5065 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5066 statement of the loop nest, and one edge per data dependence or
5067 scalar dependence. */
5070 build_rdg (struct loop *loop,
5071 VEC (loop_p, heap) **loop_nest,
5072 VEC (ddr_p, heap) **dependence_relations,
5073 VEC (data_reference_p, heap) **datarefs)
5075 struct graph *rdg = NULL;
5076 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5078 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5079 dependence_relations);
5081 if (known_dependences_p (*dependence_relations))
5083 stmts_from_loop (loop, &stmts);
5084 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5085 create_rdg_vertices (rdg, stmts);
5086 create_rdg_edges (rdg, *dependence_relations);
5089 VEC_free (gimple, heap, stmts);
5093 /* Free the reduced dependence graph RDG. */
5096 free_rdg (struct graph *rdg)
5100 for (i = 0; i < rdg->n_vertices; i++)
5102 struct vertex *v = &(rdg->vertices[i]);
5103 struct graph_edge *e;
5105 for (e = v->succ; e; e = e->succ_next)
5113 htab_delete (rdg->indices);
5117 /* Initialize STMTS with all the statements of LOOP that contain a
5121 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5124 basic_block *bbs = get_loop_body_in_dom_order (loop);
5126 for (i = 0; i < loop->num_nodes; i++)
5128 basic_block bb = bbs[i];
5129 gimple_stmt_iterator bsi;
5131 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5132 if (gimple_vdef (gsi_stmt (bsi)))
5133 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5139 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5140 that contains a data reference on its LHS with a stride of the same
5141 size as its unit type. */
5144 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5148 struct data_reference *dr;
5151 || !gimple_vdef (stmt)
5152 || !is_gimple_assign (stmt)
5153 || !gimple_assign_single_p (stmt)
5154 || !(op1 = gimple_assign_rhs1 (stmt))
5155 || !(integer_zerop (op1) || real_zerop (op1)))
5158 dr = XCNEW (struct data_reference);
5159 op0 = gimple_assign_lhs (stmt);
5161 DR_STMT (dr) = stmt;
5164 res = dr_analyze_innermost (dr)
5165 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5171 /* Initialize STMTS with all the statements of LOOP that contain a
5172 store to memory of the form "A[i] = 0". */
5175 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5179 gimple_stmt_iterator si;
5181 basic_block *bbs = get_loop_body_in_dom_order (loop);
5183 for (i = 0; i < loop->num_nodes; i++)
5184 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5185 if ((stmt = gsi_stmt (si))
5186 && stmt_with_adjacent_zero_store_dr_p (stmt))
5187 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5192 /* For a data reference REF, return the declaration of its base
5193 address or NULL_TREE if the base is not determined. */
5196 ref_base_address (gimple stmt, data_ref_loc *ref)
5198 tree base = NULL_TREE;
5200 struct data_reference *dr = XCNEW (struct data_reference);
5202 DR_STMT (dr) = stmt;
5203 DR_REF (dr) = *ref->pos;
5204 dr_analyze_innermost (dr);
5205 base_address = DR_BASE_ADDRESS (dr);
5210 switch (TREE_CODE (base_address))
5213 base = TREE_OPERAND (base_address, 0);
5217 base = base_address;
5226 /* Determines whether the statement from vertex V of the RDG has a
5227 definition used outside the loop that contains this statement. */
5230 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5232 gimple stmt = RDG_STMT (rdg, v);
5233 struct loop *loop = loop_containing_stmt (stmt);
5234 use_operand_p imm_use_p;
5235 imm_use_iterator iterator;
5237 def_operand_p def_p;
5242 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5244 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5246 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5254 /* Determines whether statements S1 and S2 access to similar memory
5255 locations. Two memory accesses are considered similar when they
5256 have the same base address declaration, i.e. when their
5257 ref_base_address is the same. */
5260 have_similar_memory_accesses (gimple s1, gimple s2)
5264 VEC (data_ref_loc, heap) *refs1, *refs2;
5265 data_ref_loc *ref1, *ref2;
5267 get_references_in_stmt (s1, &refs1);
5268 get_references_in_stmt (s2, &refs2);
5270 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5272 tree base1 = ref_base_address (s1, ref1);
5275 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5276 if (base1 == ref_base_address (s2, ref2))
5284 VEC_free (data_ref_loc, heap, refs1);
5285 VEC_free (data_ref_loc, heap, refs2);
5289 /* Helper function for the hashtab. */
5292 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5294 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5295 CONST_CAST_GIMPLE ((const_gimple) s2));
5298 /* Helper function for the hashtab. */
5301 ref_base_address_1 (const void *s)
5303 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5305 VEC (data_ref_loc, heap) *refs;
5309 get_references_in_stmt (stmt, &refs);
5311 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5314 res = htab_hash_pointer (ref_base_address (stmt, ref));
5318 VEC_free (data_ref_loc, heap, refs);
5322 /* Try to remove duplicated write data references from STMTS. */
5325 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5329 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5330 have_similar_memory_accesses_1, NULL);
5332 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5336 slot = htab_find_slot (seen, stmt, INSERT);
5339 VEC_ordered_remove (gimple, *stmts, i);
5342 *slot = (void *) stmt;
5350 /* Returns the index of PARAMETER in the parameters vector of the
5351 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5354 access_matrix_get_index_for_parameter (tree parameter,
5355 struct access_matrix *access_matrix)
5358 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5359 tree lambda_parameter;
5361 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5362 if (lambda_parameter == parameter)
5363 return i + AM_NB_INDUCTION_VARS (access_matrix);