1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
119 /* Returns true iff A divides B. */
122 tree_fold_divides_p (const_tree a, const_tree b)
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 /* Returns true iff A divides B. */
132 int_divides_p (int a, int b)
134 return ((b % a) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145 struct data_reference *dr;
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
151 /* Dump into STDERR all the data references from DATAREFS. */
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 dump_data_references (stderr, datarefs);
159 /* Dump to STDERR all the dependence relations from DDRS. */
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
174 struct data_dependence_relation *ddr;
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
180 /* Print to STDERR the data_reference DR. */
183 debug_data_reference (struct data_reference *dr)
185 dump_data_reference (stderr, dr);
188 /* Dump function for a DATA_REFERENCE structure. */
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 fprintf (outf, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
216 dump_affine_function (FILE *outf, affine_fn fn)
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
230 /* Dumps the conflict function CF to the file OUTF. */
233 dump_conflict_function (FILE *outf, conflict_function *cf)
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
243 for (i = 0; i < cf->n; i++)
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
255 dump_subscript (FILE *outf, struct subscript *subscript)
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
288 print_direction_vector (FILE *outf,
294 for (eq = 0; eq < length; eq++)
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
302 fprintf (outf, " +");
305 fprintf (outf, " -");
308 fprintf (outf, " =");
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
320 fprintf (outf, " *");
323 fprintf (outf, "indep");
327 fprintf (outf, "\n");
330 /* Print a vector of direction vectors. */
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
343 /* Print out a vector VEC of length N to OUTFILE. */
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
355 /* Print a vector of distance vectors. */
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 dump_data_dependence_relation (stderr, ddr);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
382 struct data_reference *dra, *drb;
384 fprintf (outf, "(Data Dep: \n");
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
393 dump_data_reference (outf, dra);
395 fprintf (outf, " (nil)\n");
397 dump_data_reference (outf, drb);
399 fprintf (outf, " (nil)\n");
401 fprintf (outf, " (don't know)\n)\n");
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
448 fprintf (outf, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
501 struct data_dependence_relation *ddr;
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
522 fprintf (file, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
531 struct data_dependence_relation *ddr;
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
536 fprintf (file, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
551 enum tree_code ocode = code;
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
563 case POINTER_PLUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
575 if (TREE_CODE (op1) != INTEGER_CST)
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
597 if (pbitpos % BITS_PER_UNIT != 0)
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build_pointer_plus (base, poffset);
609 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
610 fold_convert (TREE_TYPE (base), poffset));
613 var0 = fold_convert (type, base);
615 /* If variable length types are involved, punt, otherwise casts
616 might be converted into ARRAY_REFs in gimplify_conversion.
617 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
618 possibly no longer appears in current GIMPLE, might resurface.
619 This perhaps could run
620 if (CONVERT_EXPR_P (var0))
622 gimplify_conversion (&var0);
623 // Attempt to fill in any within var0 found ARRAY_REF's
624 // element size from corresponding op embedded ARRAY_REF,
625 // if unsuccessful, just punt.
627 while (POINTER_TYPE_P (type))
628 type = TREE_TYPE (type);
629 if (int_size_in_bytes (type) < 0)
639 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
640 enum tree_code subcode;
642 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
645 var0 = gimple_assign_rhs1 (def_stmt);
646 subcode = gimple_assign_rhs_code (def_stmt);
647 var1 = gimple_assign_rhs2 (def_stmt);
649 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
653 /* We must not introduce undefined overflow, and we must not change the value.
654 Hence we're okay if the inner type doesn't overflow to start with
655 (pointer or signed), the outer type also is an integer or pointer
656 and the outer precision is at least as large as the inner. */
657 tree itype = TREE_TYPE (op0);
658 if ((POINTER_TYPE_P (itype)
659 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
660 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
661 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
663 split_constant_offset (op0, &var0, off);
664 *var = fold_convert (type, var0);
675 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
676 will be ssizetype. */
679 split_constant_offset (tree exp, tree *var, tree *off)
681 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
685 *off = ssize_int (0);
688 if (tree_is_chrec (exp))
691 otype = TREE_TYPE (exp);
692 code = TREE_CODE (exp);
693 extract_ops_from_tree (exp, &code, &op0, &op1);
694 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
696 *var = fold_convert (type, e);
701 /* Returns the address ADDR of an object in a canonical shape (without nop
702 casts, and with type of pointer to the object). */
705 canonicalize_base_object_address (tree addr)
711 /* The base address may be obtained by casting from integer, in that case
713 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
716 if (TREE_CODE (addr) != ADDR_EXPR)
719 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
722 /* Analyzes the behavior of the memory reference DR in the innermost loop or
723 basic block that contains it. Returns true if analysis succeed or false
727 dr_analyze_innermost (struct data_reference *dr)
729 gimple stmt = DR_STMT (dr);
730 struct loop *loop = loop_containing_stmt (stmt);
731 tree ref = DR_REF (dr);
732 HOST_WIDE_INT pbitsize, pbitpos;
734 enum machine_mode pmode;
735 int punsignedp, pvolatilep;
736 affine_iv base_iv, offset_iv;
737 tree init, dinit, step;
738 bool in_loop = (loop && loop->num);
740 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "analyze_innermost: ");
743 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
744 &pmode, &punsignedp, &pvolatilep, false);
745 gcc_assert (base != NULL_TREE);
747 if (pbitpos % BITS_PER_UNIT != 0)
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 fprintf (dump_file, "failed: bit offset alignment.\n");
754 if (TREE_CODE (base) == MEM_REF)
756 if (!integer_zerop (TREE_OPERAND (base, 1)))
760 double_int moff = mem_ref_offset (base);
761 poffset = double_int_to_tree (sizetype, moff);
764 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
766 base = TREE_OPERAND (base, 0);
769 base = build_fold_addr_expr (base);
772 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
775 if (dump_file && (dump_flags & TDF_DETAILS))
776 fprintf (dump_file, "failed: evolution of base is not affine.\n");
783 base_iv.step = ssize_int (0);
784 base_iv.no_overflow = true;
789 offset_iv.base = ssize_int (0);
790 offset_iv.step = ssize_int (0);
796 offset_iv.base = poffset;
797 offset_iv.step = ssize_int (0);
799 else if (!simple_iv (loop, loop_containing_stmt (stmt),
800 poffset, &offset_iv, false))
802 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "failed: evolution of offset is not"
809 init = ssize_int (pbitpos / BITS_PER_UNIT);
810 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
811 init = size_binop (PLUS_EXPR, init, dinit);
812 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
813 init = size_binop (PLUS_EXPR, init, dinit);
815 step = size_binop (PLUS_EXPR,
816 fold_convert (ssizetype, base_iv.step),
817 fold_convert (ssizetype, offset_iv.step));
819 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
821 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
825 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
827 if (dump_file && (dump_flags & TDF_DETAILS))
828 fprintf (dump_file, "success.\n");
833 /* Determines the base object and the list of indices of memory reference
834 DR, analyzed in LOOP and instantiated in loop nest NEST. */
837 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
839 VEC (tree, heap) *access_fns = NULL;
840 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
841 tree base, off, access_fn = NULL_TREE;
842 basic_block before_loop = NULL;
845 before_loop = block_before_loop (nest);
847 while (handled_component_p (aref))
849 if (TREE_CODE (aref) == ARRAY_REF)
851 op = TREE_OPERAND (aref, 1);
854 access_fn = analyze_scalar_evolution (loop, op);
855 access_fn = instantiate_scev (before_loop, loop, access_fn);
856 VEC_safe_push (tree, heap, access_fns, access_fn);
859 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
862 aref = TREE_OPERAND (aref, 0);
866 && (INDIRECT_REF_P (aref)
867 || TREE_CODE (aref) == MEM_REF))
869 op = TREE_OPERAND (aref, 0);
870 access_fn = analyze_scalar_evolution (loop, op);
871 access_fn = instantiate_scev (before_loop, loop, access_fn);
872 base = initial_condition (access_fn);
873 split_constant_offset (base, &base, &off);
874 if (TREE_CODE (aref) == MEM_REF)
875 off = size_binop (PLUS_EXPR, off,
876 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
877 access_fn = chrec_replace_initial_condition (access_fn,
878 fold_convert (TREE_TYPE (base), off));
880 TREE_OPERAND (aref, 0) = base;
881 VEC_safe_push (tree, heap, access_fns, access_fn);
884 if (TREE_CODE (aref) == MEM_REF)
885 TREE_OPERAND (aref, 1)
886 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
888 if (TREE_CODE (ref) == MEM_REF
889 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
890 && integer_zerop (TREE_OPERAND (ref, 1)))
891 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
893 /* For canonicalization purposes we'd like to strip all outermost
894 zero-offset component-refs.
895 ??? For now simply handle zero-index array-refs. */
896 while (TREE_CODE (ref) == ARRAY_REF
897 && integer_zerop (TREE_OPERAND (ref, 1)))
898 ref = TREE_OPERAND (ref, 0);
900 DR_BASE_OBJECT (dr) = ref;
901 DR_ACCESS_FNS (dr) = access_fns;
904 /* Extracts the alias analysis information from the memory reference DR. */
907 dr_analyze_alias (struct data_reference *dr)
909 tree ref = DR_REF (dr);
910 tree base = get_base_address (ref), addr;
912 if (INDIRECT_REF_P (base)
913 || TREE_CODE (base) == MEM_REF)
915 addr = TREE_OPERAND (base, 0);
916 if (TREE_CODE (addr) == SSA_NAME)
917 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
921 /* Frees data reference DR. */
924 free_data_ref (data_reference_p dr)
926 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
930 /* Analyzes memory reference MEMREF accessed in STMT. The reference
931 is read if IS_READ is true, write otherwise. Returns the
932 data_reference description of MEMREF. NEST is the outermost loop
933 in which the reference should be instantiated, LOOP is the loop in
934 which the data reference should be analyzed. */
936 struct data_reference *
937 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
940 struct data_reference *dr;
942 if (dump_file && (dump_flags & TDF_DETAILS))
944 fprintf (dump_file, "Creating dr for ");
945 print_generic_expr (dump_file, memref, TDF_SLIM);
946 fprintf (dump_file, "\n");
949 dr = XCNEW (struct data_reference);
951 DR_REF (dr) = memref;
952 DR_IS_READ (dr) = is_read;
954 dr_analyze_innermost (dr);
955 dr_analyze_indices (dr, nest, loop);
956 dr_analyze_alias (dr);
958 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "\tbase_address: ");
961 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
962 fprintf (dump_file, "\n\toffset from base address: ");
963 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
964 fprintf (dump_file, "\n\tconstant offset from base address: ");
965 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
966 fprintf (dump_file, "\n\tstep: ");
967 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
968 fprintf (dump_file, "\n\taligned to: ");
969 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
970 fprintf (dump_file, "\n\tbase_object: ");
971 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
972 fprintf (dump_file, "\n");
978 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
981 dr_equal_offsets_p1 (tree offset1, tree offset2)
985 STRIP_NOPS (offset1);
986 STRIP_NOPS (offset2);
988 if (offset1 == offset2)
991 if (TREE_CODE (offset1) != TREE_CODE (offset2)
992 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
995 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
996 TREE_OPERAND (offset2, 0));
998 if (!res || !BINARY_CLASS_P (offset1))
1001 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1002 TREE_OPERAND (offset2, 1));
1007 /* Check if DRA and DRB have equal offsets. */
1009 dr_equal_offsets_p (struct data_reference *dra,
1010 struct data_reference *drb)
1012 tree offset1, offset2;
1014 offset1 = DR_OFFSET (dra);
1015 offset2 = DR_OFFSET (drb);
1017 return dr_equal_offsets_p1 (offset1, offset2);
1020 /* Returns true if FNA == FNB. */
1023 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1025 unsigned i, n = VEC_length (tree, fna);
1027 if (n != VEC_length (tree, fnb))
1030 for (i = 0; i < n; i++)
1031 if (!operand_equal_p (VEC_index (tree, fna, i),
1032 VEC_index (tree, fnb, i), 0))
1038 /* If all the functions in CF are the same, returns one of them,
1039 otherwise returns NULL. */
1042 common_affine_function (conflict_function *cf)
1047 if (!CF_NONTRIVIAL_P (cf))
1052 for (i = 1; i < cf->n; i++)
1053 if (!affine_function_equal_p (comm, cf->fns[i]))
1059 /* Returns the base of the affine function FN. */
1062 affine_function_base (affine_fn fn)
1064 return VEC_index (tree, fn, 0);
1067 /* Returns true if FN is a constant. */
1070 affine_function_constant_p (affine_fn fn)
1075 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1076 if (!integer_zerop (coef))
1082 /* Returns true if FN is the zero constant function. */
1085 affine_function_zero_p (affine_fn fn)
1087 return (integer_zerop (affine_function_base (fn))
1088 && affine_function_constant_p (fn));
1091 /* Returns a signed integer type with the largest precision from TA
1095 signed_type_for_types (tree ta, tree tb)
1097 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1098 return signed_type_for (ta);
1100 return signed_type_for (tb);
1103 /* Applies operation OP on affine functions FNA and FNB, and returns the
1107 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1113 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1115 n = VEC_length (tree, fna);
1116 m = VEC_length (tree, fnb);
1120 n = VEC_length (tree, fnb);
1121 m = VEC_length (tree, fna);
1124 ret = VEC_alloc (tree, heap, m);
1125 for (i = 0; i < n; i++)
1127 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1128 TREE_TYPE (VEC_index (tree, fnb, i)));
1130 VEC_quick_push (tree, ret,
1131 fold_build2 (op, type,
1132 VEC_index (tree, fna, i),
1133 VEC_index (tree, fnb, i)));
1136 for (; VEC_iterate (tree, fna, i, coef); i++)
1137 VEC_quick_push (tree, ret,
1138 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1139 coef, integer_zero_node));
1140 for (; VEC_iterate (tree, fnb, i, coef); i++)
1141 VEC_quick_push (tree, ret,
1142 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1143 integer_zero_node, coef));
1148 /* Returns the sum of affine functions FNA and FNB. */
1151 affine_fn_plus (affine_fn fna, affine_fn fnb)
1153 return affine_fn_op (PLUS_EXPR, fna, fnb);
1156 /* Returns the difference of affine functions FNA and FNB. */
1159 affine_fn_minus (affine_fn fna, affine_fn fnb)
1161 return affine_fn_op (MINUS_EXPR, fna, fnb);
1164 /* Frees affine function FN. */
1167 affine_fn_free (affine_fn fn)
1169 VEC_free (tree, heap, fn);
1172 /* Determine for each subscript in the data dependence relation DDR
1176 compute_subscript_distance (struct data_dependence_relation *ddr)
1178 conflict_function *cf_a, *cf_b;
1179 affine_fn fn_a, fn_b, diff;
1181 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1185 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1187 struct subscript *subscript;
1189 subscript = DDR_SUBSCRIPT (ddr, i);
1190 cf_a = SUB_CONFLICTS_IN_A (subscript);
1191 cf_b = SUB_CONFLICTS_IN_B (subscript);
1193 fn_a = common_affine_function (cf_a);
1194 fn_b = common_affine_function (cf_b);
1197 SUB_DISTANCE (subscript) = chrec_dont_know;
1200 diff = affine_fn_minus (fn_a, fn_b);
1202 if (affine_function_constant_p (diff))
1203 SUB_DISTANCE (subscript) = affine_function_base (diff);
1205 SUB_DISTANCE (subscript) = chrec_dont_know;
1207 affine_fn_free (diff);
1212 /* Returns the conflict function for "unknown". */
1214 static conflict_function *
1215 conflict_fn_not_known (void)
1217 conflict_function *fn = XCNEW (conflict_function);
1223 /* Returns the conflict function for "independent". */
1225 static conflict_function *
1226 conflict_fn_no_dependence (void)
1228 conflict_function *fn = XCNEW (conflict_function);
1229 fn->n = NO_DEPENDENCE;
1234 /* Returns true if the address of OBJ is invariant in LOOP. */
1237 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1239 while (handled_component_p (obj))
1241 if (TREE_CODE (obj) == ARRAY_REF)
1243 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1244 need to check the stride and the lower bound of the reference. */
1245 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1247 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1251 else if (TREE_CODE (obj) == COMPONENT_REF)
1253 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1257 obj = TREE_OPERAND (obj, 0);
1260 if (!INDIRECT_REF_P (obj)
1261 && TREE_CODE (obj) != MEM_REF)
1264 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1268 /* Returns false if we can prove that data references A and B do not alias,
1272 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1274 tree addr_a = DR_BASE_OBJECT (a);
1275 tree addr_b = DR_BASE_OBJECT (b);
1277 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1278 return refs_output_dependent_p (addr_a, addr_b);
1279 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1280 return refs_anti_dependent_p (addr_a, addr_b);
1281 return refs_may_alias_p (addr_a, addr_b);
1284 static void compute_self_dependence (struct data_dependence_relation *);
1286 /* Initialize a data dependence relation between data accesses A and
1287 B. NB_LOOPS is the number of loops surrounding the references: the
1288 size of the classic distance/direction vectors. */
1290 static struct data_dependence_relation *
1291 initialize_data_dependence_relation (struct data_reference *a,
1292 struct data_reference *b,
1293 VEC (loop_p, heap) *loop_nest)
1295 struct data_dependence_relation *res;
1298 res = XNEW (struct data_dependence_relation);
1301 DDR_LOOP_NEST (res) = NULL;
1302 DDR_REVERSED_P (res) = false;
1303 DDR_SUBSCRIPTS (res) = NULL;
1304 DDR_DIR_VECTS (res) = NULL;
1305 DDR_DIST_VECTS (res) = NULL;
1307 if (a == NULL || b == NULL)
1309 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1313 /* If the data references do not alias, then they are independent. */
1314 if (!dr_may_alias_p (a, b))
1316 DDR_ARE_DEPENDENT (res) = chrec_known;
1320 /* When the references are exactly the same, don't spend time doing
1321 the data dependence tests, just initialize the ddr and return. */
1322 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1324 DDR_AFFINE_P (res) = true;
1325 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1326 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1327 DDR_LOOP_NEST (res) = loop_nest;
1328 DDR_INNER_LOOP (res) = 0;
1329 DDR_SELF_REFERENCE (res) = true;
1330 compute_self_dependence (res);
1334 /* If the references do not access the same object, we do not know
1335 whether they alias or not. */
1336 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1338 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1342 /* If the base of the object is not invariant in the loop nest, we cannot
1343 analyze it. TODO -- in fact, it would suffice to record that there may
1344 be arbitrary dependences in the loops where the base object varies. */
1346 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1347 DR_BASE_OBJECT (a)))
1349 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1353 /* If the number of dimensions of the access to not agree we can have
1354 a pointer access to a component of the array element type and an
1355 array access while the base-objects are still the same. Punt. */
1356 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1358 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1362 DDR_AFFINE_P (res) = true;
1363 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1364 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1365 DDR_LOOP_NEST (res) = loop_nest;
1366 DDR_INNER_LOOP (res) = 0;
1367 DDR_SELF_REFERENCE (res) = false;
1369 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1371 struct subscript *subscript;
1373 subscript = XNEW (struct subscript);
1374 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1375 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1376 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1377 SUB_DISTANCE (subscript) = chrec_dont_know;
1378 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1384 /* Frees memory used by the conflict function F. */
1387 free_conflict_function (conflict_function *f)
1391 if (CF_NONTRIVIAL_P (f))
1393 for (i = 0; i < f->n; i++)
1394 affine_fn_free (f->fns[i]);
1399 /* Frees memory used by SUBSCRIPTS. */
1402 free_subscripts (VEC (subscript_p, heap) *subscripts)
1407 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1409 free_conflict_function (s->conflicting_iterations_in_a);
1410 free_conflict_function (s->conflicting_iterations_in_b);
1413 VEC_free (subscript_p, heap, subscripts);
1416 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1420 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1423 if (dump_file && (dump_flags & TDF_DETAILS))
1425 fprintf (dump_file, "(dependence classified: ");
1426 print_generic_expr (dump_file, chrec, 0);
1427 fprintf (dump_file, ")\n");
1430 DDR_ARE_DEPENDENT (ddr) = chrec;
1431 free_subscripts (DDR_SUBSCRIPTS (ddr));
1432 DDR_SUBSCRIPTS (ddr) = NULL;
1435 /* The dependence relation DDR cannot be represented by a distance
1439 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1441 if (dump_file && (dump_flags & TDF_DETAILS))
1442 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1444 DDR_AFFINE_P (ddr) = false;
1449 /* This section contains the classic Banerjee tests. */
1451 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1452 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1455 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1457 return (evolution_function_is_constant_p (chrec_a)
1458 && evolution_function_is_constant_p (chrec_b));
1461 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1462 variable, i.e., if the SIV (Single Index Variable) test is true. */
1465 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1467 if ((evolution_function_is_constant_p (chrec_a)
1468 && evolution_function_is_univariate_p (chrec_b))
1469 || (evolution_function_is_constant_p (chrec_b)
1470 && evolution_function_is_univariate_p (chrec_a)))
1473 if (evolution_function_is_univariate_p (chrec_a)
1474 && evolution_function_is_univariate_p (chrec_b))
1476 switch (TREE_CODE (chrec_a))
1478 case POLYNOMIAL_CHREC:
1479 switch (TREE_CODE (chrec_b))
1481 case POLYNOMIAL_CHREC:
1482 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1497 /* Creates a conflict function with N dimensions. The affine functions
1498 in each dimension follow. */
1500 static conflict_function *
1501 conflict_fn (unsigned n, ...)
1504 conflict_function *ret = XCNEW (conflict_function);
1507 gcc_assert (0 < n && n <= MAX_DIM);
1511 for (i = 0; i < n; i++)
1512 ret->fns[i] = va_arg (ap, affine_fn);
1518 /* Returns constant affine function with value CST. */
1521 affine_fn_cst (tree cst)
1523 affine_fn fn = VEC_alloc (tree, heap, 1);
1524 VEC_quick_push (tree, fn, cst);
1528 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1531 affine_fn_univar (tree cst, unsigned dim, tree coef)
1533 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1536 gcc_assert (dim > 0);
1537 VEC_quick_push (tree, fn, cst);
1538 for (i = 1; i < dim; i++)
1539 VEC_quick_push (tree, fn, integer_zero_node);
1540 VEC_quick_push (tree, fn, coef);
1544 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1545 *OVERLAPS_B are initialized to the functions that describe the
1546 relation between the elements accessed twice by CHREC_A and
1547 CHREC_B. For k >= 0, the following property is verified:
1549 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1552 analyze_ziv_subscript (tree chrec_a,
1554 conflict_function **overlaps_a,
1555 conflict_function **overlaps_b,
1556 tree *last_conflicts)
1558 tree type, difference;
1559 dependence_stats.num_ziv++;
1561 if (dump_file && (dump_flags & TDF_DETAILS))
1562 fprintf (dump_file, "(analyze_ziv_subscript \n");
1564 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1565 chrec_a = chrec_convert (type, chrec_a, NULL);
1566 chrec_b = chrec_convert (type, chrec_b, NULL);
1567 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1569 switch (TREE_CODE (difference))
1572 if (integer_zerop (difference))
1574 /* The difference is equal to zero: the accessed index
1575 overlaps for each iteration in the loop. */
1576 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1577 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1578 *last_conflicts = chrec_dont_know;
1579 dependence_stats.num_ziv_dependent++;
1583 /* The accesses do not overlap. */
1584 *overlaps_a = conflict_fn_no_dependence ();
1585 *overlaps_b = conflict_fn_no_dependence ();
1586 *last_conflicts = integer_zero_node;
1587 dependence_stats.num_ziv_independent++;
1592 /* We're not sure whether the indexes overlap. For the moment,
1593 conservatively answer "don't know". */
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1595 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1597 *overlaps_a = conflict_fn_not_known ();
1598 *overlaps_b = conflict_fn_not_known ();
1599 *last_conflicts = chrec_dont_know;
1600 dependence_stats.num_ziv_unimplemented++;
1604 if (dump_file && (dump_flags & TDF_DETAILS))
1605 fprintf (dump_file, ")\n");
1608 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1609 and only if it fits to the int type. If this is not the case, or the
1610 bound on the number of iterations of LOOP could not be derived, returns
1614 max_stmt_executions_tree (struct loop *loop)
1618 if (!max_stmt_executions (loop, true, &nit))
1619 return chrec_dont_know;
1621 if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1622 return chrec_dont_know;
1624 return double_int_to_tree (unsigned_type_node, nit);
1627 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1628 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1629 *OVERLAPS_B are initialized to the functions that describe the
1630 relation between the elements accessed twice by CHREC_A and
1631 CHREC_B. For k >= 0, the following property is verified:
1633 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1636 analyze_siv_subscript_cst_affine (tree chrec_a,
1638 conflict_function **overlaps_a,
1639 conflict_function **overlaps_b,
1640 tree *last_conflicts)
1642 bool value0, value1, value2;
1643 tree type, difference, tmp;
1645 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1646 chrec_a = chrec_convert (type, chrec_a, NULL);
1647 chrec_b = chrec_convert (type, chrec_b, NULL);
1648 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1650 if (!chrec_is_positive (initial_condition (difference), &value0))
1652 if (dump_file && (dump_flags & TDF_DETAILS))
1653 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1655 dependence_stats.num_siv_unimplemented++;
1656 *overlaps_a = conflict_fn_not_known ();
1657 *overlaps_b = conflict_fn_not_known ();
1658 *last_conflicts = chrec_dont_know;
1663 if (value0 == false)
1665 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1667 if (dump_file && (dump_flags & TDF_DETAILS))
1668 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1670 *overlaps_a = conflict_fn_not_known ();
1671 *overlaps_b = conflict_fn_not_known ();
1672 *last_conflicts = chrec_dont_know;
1673 dependence_stats.num_siv_unimplemented++;
1682 chrec_b = {10, +, 1}
1685 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1687 HOST_WIDE_INT numiter;
1688 struct loop *loop = get_chrec_loop (chrec_b);
1690 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1691 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1692 fold_build1 (ABS_EXPR, type, difference),
1693 CHREC_RIGHT (chrec_b));
1694 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1695 *last_conflicts = integer_one_node;
1698 /* Perform weak-zero siv test to see if overlap is
1699 outside the loop bounds. */
1700 numiter = max_stmt_executions_int (loop, true);
1703 && compare_tree_int (tmp, numiter) > 0)
1705 free_conflict_function (*overlaps_a);
1706 free_conflict_function (*overlaps_b);
1707 *overlaps_a = conflict_fn_no_dependence ();
1708 *overlaps_b = conflict_fn_no_dependence ();
1709 *last_conflicts = integer_zero_node;
1710 dependence_stats.num_siv_independent++;
1713 dependence_stats.num_siv_dependent++;
1717 /* When the step does not divide the difference, there are
1721 *overlaps_a = conflict_fn_no_dependence ();
1722 *overlaps_b = conflict_fn_no_dependence ();
1723 *last_conflicts = integer_zero_node;
1724 dependence_stats.num_siv_independent++;
1733 chrec_b = {10, +, -1}
1735 In this case, chrec_a will not overlap with chrec_b. */
1736 *overlaps_a = conflict_fn_no_dependence ();
1737 *overlaps_b = conflict_fn_no_dependence ();
1738 *last_conflicts = integer_zero_node;
1739 dependence_stats.num_siv_independent++;
1746 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1748 if (dump_file && (dump_flags & TDF_DETAILS))
1749 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1751 *overlaps_a = conflict_fn_not_known ();
1752 *overlaps_b = conflict_fn_not_known ();
1753 *last_conflicts = chrec_dont_know;
1754 dependence_stats.num_siv_unimplemented++;
1759 if (value2 == false)
1763 chrec_b = {10, +, -1}
1765 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1767 HOST_WIDE_INT numiter;
1768 struct loop *loop = get_chrec_loop (chrec_b);
1770 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1771 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1772 CHREC_RIGHT (chrec_b));
1773 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1774 *last_conflicts = integer_one_node;
1776 /* Perform weak-zero siv test to see if overlap is
1777 outside the loop bounds. */
1778 numiter = max_stmt_executions_int (loop, true);
1781 && compare_tree_int (tmp, numiter) > 0)
1783 free_conflict_function (*overlaps_a);
1784 free_conflict_function (*overlaps_b);
1785 *overlaps_a = conflict_fn_no_dependence ();
1786 *overlaps_b = conflict_fn_no_dependence ();
1787 *last_conflicts = integer_zero_node;
1788 dependence_stats.num_siv_independent++;
1791 dependence_stats.num_siv_dependent++;
1795 /* When the step does not divide the difference, there
1799 *overlaps_a = conflict_fn_no_dependence ();
1800 *overlaps_b = conflict_fn_no_dependence ();
1801 *last_conflicts = integer_zero_node;
1802 dependence_stats.num_siv_independent++;
1812 In this case, chrec_a will not overlap with chrec_b. */
1813 *overlaps_a = conflict_fn_no_dependence ();
1814 *overlaps_b = conflict_fn_no_dependence ();
1815 *last_conflicts = integer_zero_node;
1816 dependence_stats.num_siv_independent++;
1824 /* Helper recursive function for initializing the matrix A. Returns
1825 the initial value of CHREC. */
1828 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1832 switch (TREE_CODE (chrec))
1834 case POLYNOMIAL_CHREC:
1835 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1837 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1838 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1844 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1845 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1847 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1852 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1853 return chrec_convert (chrec_type (chrec), op, NULL);
1858 /* Handle ~X as -1 - X. */
1859 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1860 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1861 build_int_cst (TREE_TYPE (chrec), -1), op);
1873 #define FLOOR_DIV(x,y) ((x) / (y))
1875 /* Solves the special case of the Diophantine equation:
1876 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1878 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1879 number of iterations that loops X and Y run. The overlaps will be
1880 constructed as evolutions in dimension DIM. */
1883 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1884 affine_fn *overlaps_a,
1885 affine_fn *overlaps_b,
1886 tree *last_conflicts, int dim)
1888 if (((step_a > 0 && step_b > 0)
1889 || (step_a < 0 && step_b < 0)))
1891 int step_overlaps_a, step_overlaps_b;
1892 int gcd_steps_a_b, last_conflict, tau2;
1894 gcd_steps_a_b = gcd (step_a, step_b);
1895 step_overlaps_a = step_b / gcd_steps_a_b;
1896 step_overlaps_b = step_a / gcd_steps_a_b;
1900 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1901 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1902 last_conflict = tau2;
1903 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1906 *last_conflicts = chrec_dont_know;
1908 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1909 build_int_cst (NULL_TREE,
1911 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1912 build_int_cst (NULL_TREE,
1918 *overlaps_a = affine_fn_cst (integer_zero_node);
1919 *overlaps_b = affine_fn_cst (integer_zero_node);
1920 *last_conflicts = integer_zero_node;
1924 /* Solves the special case of a Diophantine equation where CHREC_A is
1925 an affine bivariate function, and CHREC_B is an affine univariate
1926 function. For example,
1928 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1930 has the following overlapping functions:
1932 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1933 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1934 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1936 FORNOW: This is a specialized implementation for a case occurring in
1937 a common benchmark. Implement the general algorithm. */
1940 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1941 conflict_function **overlaps_a,
1942 conflict_function **overlaps_b,
1943 tree *last_conflicts)
1945 bool xz_p, yz_p, xyz_p;
1946 int step_x, step_y, step_z;
1947 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1948 affine_fn overlaps_a_xz, overlaps_b_xz;
1949 affine_fn overlaps_a_yz, overlaps_b_yz;
1950 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1951 affine_fn ova1, ova2, ovb;
1952 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1954 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1955 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1956 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1959 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
1960 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
1961 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
1963 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1965 if (dump_file && (dump_flags & TDF_DETAILS))
1966 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1968 *overlaps_a = conflict_fn_not_known ();
1969 *overlaps_b = conflict_fn_not_known ();
1970 *last_conflicts = chrec_dont_know;
1974 niter = MIN (niter_x, niter_z);
1975 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1978 &last_conflicts_xz, 1);
1979 niter = MIN (niter_y, niter_z);
1980 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1983 &last_conflicts_yz, 2);
1984 niter = MIN (niter_x, niter_z);
1985 niter = MIN (niter_y, niter);
1986 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1989 &last_conflicts_xyz, 3);
1991 xz_p = !integer_zerop (last_conflicts_xz);
1992 yz_p = !integer_zerop (last_conflicts_yz);
1993 xyz_p = !integer_zerop (last_conflicts_xyz);
1995 if (xz_p || yz_p || xyz_p)
1997 ova1 = affine_fn_cst (integer_zero_node);
1998 ova2 = affine_fn_cst (integer_zero_node);
1999 ovb = affine_fn_cst (integer_zero_node);
2002 affine_fn t0 = ova1;
2005 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2006 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2007 affine_fn_free (t0);
2008 affine_fn_free (t2);
2009 *last_conflicts = last_conflicts_xz;
2013 affine_fn t0 = ova2;
2016 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2017 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2018 affine_fn_free (t0);
2019 affine_fn_free (t2);
2020 *last_conflicts = last_conflicts_yz;
2024 affine_fn t0 = ova1;
2025 affine_fn t2 = ova2;
2028 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2029 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2030 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2031 affine_fn_free (t0);
2032 affine_fn_free (t2);
2033 affine_fn_free (t4);
2034 *last_conflicts = last_conflicts_xyz;
2036 *overlaps_a = conflict_fn (2, ova1, ova2);
2037 *overlaps_b = conflict_fn (1, ovb);
2041 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2042 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2043 *last_conflicts = integer_zero_node;
2046 affine_fn_free (overlaps_a_xz);
2047 affine_fn_free (overlaps_b_xz);
2048 affine_fn_free (overlaps_a_yz);
2049 affine_fn_free (overlaps_b_yz);
2050 affine_fn_free (overlaps_a_xyz);
2051 affine_fn_free (overlaps_b_xyz);
2054 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2057 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2060 memcpy (vec2, vec1, size * sizeof (*vec1));
2063 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2066 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2071 for (i = 0; i < m; i++)
2072 lambda_vector_copy (mat1[i], mat2[i], n);
2075 /* Store the N x N identity matrix in MAT. */
2078 lambda_matrix_id (lambda_matrix mat, int size)
2082 for (i = 0; i < size; i++)
2083 for (j = 0; j < size; j++)
2084 mat[i][j] = (i == j) ? 1 : 0;
2087 /* Return the first nonzero element of vector VEC1 between START and N.
2088 We must have START <= N. Returns N if VEC1 is the zero vector. */
2091 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2094 while (j < n && vec1[j] == 0)
2099 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2100 R2 = R2 + CONST1 * R1. */
2103 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2110 for (i = 0; i < n; i++)
2111 mat[r2][i] += const1 * mat[r1][i];
2114 /* Swap rows R1 and R2 in matrix MAT. */
2117 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2126 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2127 and store the result in VEC2. */
2130 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2131 int size, int const1)
2136 lambda_vector_clear (vec2, size);
2138 for (i = 0; i < size; i++)
2139 vec2[i] = const1 * vec1[i];
2142 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2145 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2148 lambda_vector_mult_const (vec1, vec2, size, -1);
2151 /* Negate row R1 of matrix MAT which has N columns. */
2154 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2156 lambda_vector_negate (mat[r1], mat[r1], n);
2159 /* Return true if two vectors are equal. */
2162 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2165 for (i = 0; i < size; i++)
2166 if (vec1[i] != vec2[i])
2171 /* Given an M x N integer matrix A, this function determines an M x
2172 M unimodular matrix U, and an M x N echelon matrix S such that
2173 "U.A = S". This decomposition is also known as "right Hermite".
2175 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2176 Restructuring Compilers" Utpal Banerjee. */
2179 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2180 lambda_matrix S, lambda_matrix U)
2184 lambda_matrix_copy (A, S, m, n);
2185 lambda_matrix_id (U, m);
2187 for (j = 0; j < n; j++)
2189 if (lambda_vector_first_nz (S[j], m, i0) < m)
2192 for (i = m - 1; i >= i0; i--)
2194 while (S[i][j] != 0)
2196 int sigma, factor, a, b;
2200 sigma = (a * b < 0) ? -1: 1;
2203 factor = sigma * (a / b);
2205 lambda_matrix_row_add (S, n, i, i-1, -factor);
2206 lambda_matrix_row_exchange (S, i, i-1);
2208 lambda_matrix_row_add (U, m, i, i-1, -factor);
2209 lambda_matrix_row_exchange (U, i, i-1);
2216 /* Determines the overlapping elements due to accesses CHREC_A and
2217 CHREC_B, that are affine functions. This function cannot handle
2218 symbolic evolution functions, ie. when initial conditions are
2219 parameters, because it uses lambda matrices of integers. */
2222 analyze_subscript_affine_affine (tree chrec_a,
2224 conflict_function **overlaps_a,
2225 conflict_function **overlaps_b,
2226 tree *last_conflicts)
2228 unsigned nb_vars_a, nb_vars_b, dim;
2229 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2230 lambda_matrix A, U, S;
2231 struct obstack scratch_obstack;
2233 if (eq_evolutions_p (chrec_a, chrec_b))
2235 /* The accessed index overlaps for each iteration in the
2237 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2238 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2239 *last_conflicts = chrec_dont_know;
2242 if (dump_file && (dump_flags & TDF_DETAILS))
2243 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2245 /* For determining the initial intersection, we have to solve a
2246 Diophantine equation. This is the most time consuming part.
2248 For answering to the question: "Is there a dependence?" we have
2249 to prove that there exists a solution to the Diophantine
2250 equation, and that the solution is in the iteration domain,
2251 i.e. the solution is positive or zero, and that the solution
2252 happens before the upper bound loop.nb_iterations. Otherwise
2253 there is no dependence. This function outputs a description of
2254 the iterations that hold the intersections. */
2256 nb_vars_a = nb_vars_in_chrec (chrec_a);
2257 nb_vars_b = nb_vars_in_chrec (chrec_b);
2259 gcc_obstack_init (&scratch_obstack);
2261 dim = nb_vars_a + nb_vars_b;
2262 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2263 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2264 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2266 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2267 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2268 gamma = init_b - init_a;
2270 /* Don't do all the hard work of solving the Diophantine equation
2271 when we already know the solution: for example,
2274 | gamma = 3 - 3 = 0.
2275 Then the first overlap occurs during the first iterations:
2276 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2280 if (nb_vars_a == 1 && nb_vars_b == 1)
2282 HOST_WIDE_INT step_a, step_b;
2283 HOST_WIDE_INT niter, niter_a, niter_b;
2286 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2287 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2288 niter = MIN (niter_a, niter_b);
2289 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2290 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2292 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2295 *overlaps_a = conflict_fn (1, ova);
2296 *overlaps_b = conflict_fn (1, ovb);
2299 else if (nb_vars_a == 2 && nb_vars_b == 1)
2300 compute_overlap_steps_for_affine_1_2
2301 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2303 else if (nb_vars_a == 1 && nb_vars_b == 2)
2304 compute_overlap_steps_for_affine_1_2
2305 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2309 if (dump_file && (dump_flags & TDF_DETAILS))
2310 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2311 *overlaps_a = conflict_fn_not_known ();
2312 *overlaps_b = conflict_fn_not_known ();
2313 *last_conflicts = chrec_dont_know;
2315 goto end_analyze_subs_aa;
2319 lambda_matrix_right_hermite (A, dim, 1, S, U);
2324 lambda_matrix_row_negate (U, dim, 0);
2326 gcd_alpha_beta = S[0][0];
2328 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2329 but that is a quite strange case. Instead of ICEing, answer
2331 if (gcd_alpha_beta == 0)
2333 *overlaps_a = conflict_fn_not_known ();
2334 *overlaps_b = conflict_fn_not_known ();
2335 *last_conflicts = chrec_dont_know;
2336 goto end_analyze_subs_aa;
2339 /* The classic "gcd-test". */
2340 if (!int_divides_p (gcd_alpha_beta, gamma))
2342 /* The "gcd-test" has determined that there is no integer
2343 solution, i.e. there is no dependence. */
2344 *overlaps_a = conflict_fn_no_dependence ();
2345 *overlaps_b = conflict_fn_no_dependence ();
2346 *last_conflicts = integer_zero_node;
2349 /* Both access functions are univariate. This includes SIV and MIV cases. */
2350 else if (nb_vars_a == 1 && nb_vars_b == 1)
2352 /* Both functions should have the same evolution sign. */
2353 if (((A[0][0] > 0 && -A[1][0] > 0)
2354 || (A[0][0] < 0 && -A[1][0] < 0)))
2356 /* The solutions are given by:
2358 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2361 For a given integer t. Using the following variables,
2363 | i0 = u11 * gamma / gcd_alpha_beta
2364 | j0 = u12 * gamma / gcd_alpha_beta
2371 | y0 = j0 + j1 * t. */
2372 HOST_WIDE_INT i0, j0, i1, j1;
2374 i0 = U[0][0] * gamma / gcd_alpha_beta;
2375 j0 = U[0][1] * gamma / gcd_alpha_beta;
2379 if ((i1 == 0 && i0 < 0)
2380 || (j1 == 0 && j0 < 0))
2382 /* There is no solution.
2383 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2384 falls in here, but for the moment we don't look at the
2385 upper bound of the iteration domain. */
2386 *overlaps_a = conflict_fn_no_dependence ();
2387 *overlaps_b = conflict_fn_no_dependence ();
2388 *last_conflicts = integer_zero_node;
2389 goto end_analyze_subs_aa;
2392 if (i1 > 0 && j1 > 0)
2394 HOST_WIDE_INT niter_a = max_stmt_executions_int
2395 (get_chrec_loop (chrec_a), true);
2396 HOST_WIDE_INT niter_b = max_stmt_executions_int
2397 (get_chrec_loop (chrec_b), true);
2398 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2400 /* (X0, Y0) is a solution of the Diophantine equation:
2401 "chrec_a (X0) = chrec_b (Y0)". */
2402 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2404 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2405 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2407 /* (X1, Y1) is the smallest positive solution of the eq
2408 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2409 first conflict occurs. */
2410 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2411 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2412 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2416 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2417 FLOOR_DIV (niter - j0, j1));
2418 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2420 /* If the overlap occurs outside of the bounds of the
2421 loop, there is no dependence. */
2422 if (x1 >= niter || y1 >= niter)
2424 *overlaps_a = conflict_fn_no_dependence ();
2425 *overlaps_b = conflict_fn_no_dependence ();
2426 *last_conflicts = integer_zero_node;
2427 goto end_analyze_subs_aa;
2430 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2433 *last_conflicts = chrec_dont_know;
2437 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2439 build_int_cst (NULL_TREE, i1)));
2442 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2444 build_int_cst (NULL_TREE, j1)));
2448 /* FIXME: For the moment, the upper bound of the
2449 iteration domain for i and j is not checked. */
2450 if (dump_file && (dump_flags & TDF_DETAILS))
2451 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2452 *overlaps_a = conflict_fn_not_known ();
2453 *overlaps_b = conflict_fn_not_known ();
2454 *last_conflicts = chrec_dont_know;
2459 if (dump_file && (dump_flags & TDF_DETAILS))
2460 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2461 *overlaps_a = conflict_fn_not_known ();
2462 *overlaps_b = conflict_fn_not_known ();
2463 *last_conflicts = chrec_dont_know;
2468 if (dump_file && (dump_flags & TDF_DETAILS))
2469 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2470 *overlaps_a = conflict_fn_not_known ();
2471 *overlaps_b = conflict_fn_not_known ();
2472 *last_conflicts = chrec_dont_know;
2475 end_analyze_subs_aa:
2476 obstack_free (&scratch_obstack, NULL);
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2479 fprintf (dump_file, " (overlaps_a = ");
2480 dump_conflict_function (dump_file, *overlaps_a);
2481 fprintf (dump_file, ")\n (overlaps_b = ");
2482 dump_conflict_function (dump_file, *overlaps_b);
2483 fprintf (dump_file, ")\n");
2484 fprintf (dump_file, ")\n");
2488 /* Returns true when analyze_subscript_affine_affine can be used for
2489 determining the dependence relation between chrec_a and chrec_b,
2490 that contain symbols. This function modifies chrec_a and chrec_b
2491 such that the analysis result is the same, and such that they don't
2492 contain symbols, and then can safely be passed to the analyzer.
2494 Example: The analysis of the following tuples of evolutions produce
2495 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2498 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2499 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2503 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2505 tree diff, type, left_a, left_b, right_b;
2507 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2508 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2509 /* FIXME: For the moment not handled. Might be refined later. */
2512 type = chrec_type (*chrec_a);
2513 left_a = CHREC_LEFT (*chrec_a);
2514 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2515 diff = chrec_fold_minus (type, left_a, left_b);
2517 if (!evolution_function_is_constant_p (diff))
2520 if (dump_file && (dump_flags & TDF_DETAILS))
2521 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2523 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2524 diff, CHREC_RIGHT (*chrec_a));
2525 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2526 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2527 build_int_cst (type, 0),
2532 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2533 *OVERLAPS_B are initialized to the functions that describe the
2534 relation between the elements accessed twice by CHREC_A and
2535 CHREC_B. For k >= 0, the following property is verified:
2537 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2540 analyze_siv_subscript (tree chrec_a,
2542 conflict_function **overlaps_a,
2543 conflict_function **overlaps_b,
2544 tree *last_conflicts,
2547 dependence_stats.num_siv++;
2549 if (dump_file && (dump_flags & TDF_DETAILS))
2550 fprintf (dump_file, "(analyze_siv_subscript \n");
2552 if (evolution_function_is_constant_p (chrec_a)
2553 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2554 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2555 overlaps_a, overlaps_b, last_conflicts);
2557 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2558 && evolution_function_is_constant_p (chrec_b))
2559 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2560 overlaps_b, overlaps_a, last_conflicts);
2562 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2563 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2565 if (!chrec_contains_symbols (chrec_a)
2566 && !chrec_contains_symbols (chrec_b))
2568 analyze_subscript_affine_affine (chrec_a, chrec_b,
2569 overlaps_a, overlaps_b,
2572 if (CF_NOT_KNOWN_P (*overlaps_a)
2573 || CF_NOT_KNOWN_P (*overlaps_b))
2574 dependence_stats.num_siv_unimplemented++;
2575 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2576 || CF_NO_DEPENDENCE_P (*overlaps_b))
2577 dependence_stats.num_siv_independent++;
2579 dependence_stats.num_siv_dependent++;
2581 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2584 analyze_subscript_affine_affine (chrec_a, chrec_b,
2585 overlaps_a, overlaps_b,
2588 if (CF_NOT_KNOWN_P (*overlaps_a)
2589 || CF_NOT_KNOWN_P (*overlaps_b))
2590 dependence_stats.num_siv_unimplemented++;
2591 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2592 || CF_NO_DEPENDENCE_P (*overlaps_b))
2593 dependence_stats.num_siv_independent++;
2595 dependence_stats.num_siv_dependent++;
2598 goto siv_subscript_dontknow;
2603 siv_subscript_dontknow:;
2604 if (dump_file && (dump_flags & TDF_DETAILS))
2605 fprintf (dump_file, "siv test failed: unimplemented.\n");
2606 *overlaps_a = conflict_fn_not_known ();
2607 *overlaps_b = conflict_fn_not_known ();
2608 *last_conflicts = chrec_dont_know;
2609 dependence_stats.num_siv_unimplemented++;
2612 if (dump_file && (dump_flags & TDF_DETAILS))
2613 fprintf (dump_file, ")\n");
2616 /* Returns false if we can prove that the greatest common divisor of the steps
2617 of CHREC does not divide CST, false otherwise. */
2620 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2622 HOST_WIDE_INT cd = 0, val;
2625 if (!host_integerp (cst, 0))
2627 val = tree_low_cst (cst, 0);
2629 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2631 step = CHREC_RIGHT (chrec);
2632 if (!host_integerp (step, 0))
2634 cd = gcd (cd, tree_low_cst (step, 0));
2635 chrec = CHREC_LEFT (chrec);
2638 return val % cd == 0;
2641 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2642 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2643 functions that describe the relation between the elements accessed
2644 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2647 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2650 analyze_miv_subscript (tree chrec_a,
2652 conflict_function **overlaps_a,
2653 conflict_function **overlaps_b,
2654 tree *last_conflicts,
2655 struct loop *loop_nest)
2657 tree type, difference;
2659 dependence_stats.num_miv++;
2660 if (dump_file && (dump_flags & TDF_DETAILS))
2661 fprintf (dump_file, "(analyze_miv_subscript \n");
2663 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2664 chrec_a = chrec_convert (type, chrec_a, NULL);
2665 chrec_b = chrec_convert (type, chrec_b, NULL);
2666 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2668 if (eq_evolutions_p (chrec_a, chrec_b))
2670 /* Access functions are the same: all the elements are accessed
2671 in the same order. */
2672 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2673 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2674 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2675 dependence_stats.num_miv_dependent++;
2678 else if (evolution_function_is_constant_p (difference)
2679 /* For the moment, the following is verified:
2680 evolution_function_is_affine_multivariate_p (chrec_a,
2682 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2684 /* testsuite/.../ssa-chrec-33.c
2685 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2687 The difference is 1, and all the evolution steps are multiples
2688 of 2, consequently there are no overlapping elements. */
2689 *overlaps_a = conflict_fn_no_dependence ();
2690 *overlaps_b = conflict_fn_no_dependence ();
2691 *last_conflicts = integer_zero_node;
2692 dependence_stats.num_miv_independent++;
2695 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2696 && !chrec_contains_symbols (chrec_a)
2697 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2698 && !chrec_contains_symbols (chrec_b))
2700 /* testsuite/.../ssa-chrec-35.c
2701 {0, +, 1}_2 vs. {0, +, 1}_3
2702 the overlapping elements are respectively located at iterations:
2703 {0, +, 1}_x and {0, +, 1}_x,
2704 in other words, we have the equality:
2705 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2708 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2709 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2711 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2712 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2714 analyze_subscript_affine_affine (chrec_a, chrec_b,
2715 overlaps_a, overlaps_b, last_conflicts);
2717 if (CF_NOT_KNOWN_P (*overlaps_a)
2718 || CF_NOT_KNOWN_P (*overlaps_b))
2719 dependence_stats.num_miv_unimplemented++;
2720 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2721 || CF_NO_DEPENDENCE_P (*overlaps_b))
2722 dependence_stats.num_miv_independent++;
2724 dependence_stats.num_miv_dependent++;
2729 /* When the analysis is too difficult, answer "don't know". */
2730 if (dump_file && (dump_flags & TDF_DETAILS))
2731 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2733 *overlaps_a = conflict_fn_not_known ();
2734 *overlaps_b = conflict_fn_not_known ();
2735 *last_conflicts = chrec_dont_know;
2736 dependence_stats.num_miv_unimplemented++;
2739 if (dump_file && (dump_flags & TDF_DETAILS))
2740 fprintf (dump_file, ")\n");
2743 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2744 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2745 OVERLAP_ITERATIONS_B are initialized with two functions that
2746 describe the iterations that contain conflicting elements.
2748 Remark: For an integer k >= 0, the following equality is true:
2750 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2754 analyze_overlapping_iterations (tree chrec_a,
2756 conflict_function **overlap_iterations_a,
2757 conflict_function **overlap_iterations_b,
2758 tree *last_conflicts, struct loop *loop_nest)
2760 unsigned int lnn = loop_nest->num;
2762 dependence_stats.num_subscript_tests++;
2764 if (dump_file && (dump_flags & TDF_DETAILS))
2766 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2767 fprintf (dump_file, " (chrec_a = ");
2768 print_generic_expr (dump_file, chrec_a, 0);
2769 fprintf (dump_file, ")\n (chrec_b = ");
2770 print_generic_expr (dump_file, chrec_b, 0);
2771 fprintf (dump_file, ")\n");
2774 if (chrec_a == NULL_TREE
2775 || chrec_b == NULL_TREE
2776 || chrec_contains_undetermined (chrec_a)
2777 || chrec_contains_undetermined (chrec_b))
2779 dependence_stats.num_subscript_undetermined++;
2781 *overlap_iterations_a = conflict_fn_not_known ();
2782 *overlap_iterations_b = conflict_fn_not_known ();
2785 /* If they are the same chrec, and are affine, they overlap
2786 on every iteration. */
2787 else if (eq_evolutions_p (chrec_a, chrec_b)
2788 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2789 || operand_equal_p (chrec_a, chrec_b, 0)))
2791 dependence_stats.num_same_subscript_function++;
2792 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2793 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2794 *last_conflicts = chrec_dont_know;
2797 /* If they aren't the same, and aren't affine, we can't do anything
2799 else if ((chrec_contains_symbols (chrec_a)
2800 || chrec_contains_symbols (chrec_b))
2801 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2802 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2804 dependence_stats.num_subscript_undetermined++;
2805 *overlap_iterations_a = conflict_fn_not_known ();
2806 *overlap_iterations_b = conflict_fn_not_known ();
2809 else if (ziv_subscript_p (chrec_a, chrec_b))
2810 analyze_ziv_subscript (chrec_a, chrec_b,
2811 overlap_iterations_a, overlap_iterations_b,
2814 else if (siv_subscript_p (chrec_a, chrec_b))
2815 analyze_siv_subscript (chrec_a, chrec_b,
2816 overlap_iterations_a, overlap_iterations_b,
2817 last_conflicts, lnn);
2820 analyze_miv_subscript (chrec_a, chrec_b,
2821 overlap_iterations_a, overlap_iterations_b,
2822 last_conflicts, loop_nest);
2824 if (dump_file && (dump_flags & TDF_DETAILS))
2826 fprintf (dump_file, " (overlap_iterations_a = ");
2827 dump_conflict_function (dump_file, *overlap_iterations_a);
2828 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2829 dump_conflict_function (dump_file, *overlap_iterations_b);
2830 fprintf (dump_file, ")\n");
2831 fprintf (dump_file, ")\n");
2835 /* Helper function for uniquely inserting distance vectors. */
2838 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2843 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2844 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2847 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2850 /* Helper function for uniquely inserting direction vectors. */
2853 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2858 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2859 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2862 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2865 /* Add a distance of 1 on all the loops outer than INDEX. If we
2866 haven't yet determined a distance for this outer loop, push a new
2867 distance vector composed of the previous distance, and a distance
2868 of 1 for this outer loop. Example:
2876 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2877 save (0, 1), then we have to save (1, 0). */
2880 add_outer_distances (struct data_dependence_relation *ddr,
2881 lambda_vector dist_v, int index)
2883 /* For each outer loop where init_v is not set, the accesses are
2884 in dependence of distance 1 in the loop. */
2885 while (--index >= 0)
2887 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2888 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2890 save_dist_v (ddr, save_v);
2894 /* Return false when fail to represent the data dependence as a
2895 distance vector. INIT_B is set to true when a component has been
2896 added to the distance vector DIST_V. INDEX_CARRY is then set to
2897 the index in DIST_V that carries the dependence. */
2900 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2901 struct data_reference *ddr_a,
2902 struct data_reference *ddr_b,
2903 lambda_vector dist_v, bool *init_b,
2907 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2909 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2911 tree access_fn_a, access_fn_b;
2912 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2914 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2916 non_affine_dependence_relation (ddr);
2920 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2921 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2923 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2924 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2927 int var_a = CHREC_VARIABLE (access_fn_a);
2928 int var_b = CHREC_VARIABLE (access_fn_b);
2931 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2933 non_affine_dependence_relation (ddr);
2937 dist = int_cst_value (SUB_DISTANCE (subscript));
2938 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2939 *index_carry = MIN (index, *index_carry);
2941 /* This is the subscript coupling test. If we have already
2942 recorded a distance for this loop (a distance coming from
2943 another subscript), it should be the same. For example,
2944 in the following code, there is no dependence:
2951 if (init_v[index] != 0 && dist_v[index] != dist)
2953 finalize_ddr_dependent (ddr, chrec_known);
2957 dist_v[index] = dist;
2961 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2963 /* This can be for example an affine vs. constant dependence
2964 (T[i] vs. T[3]) that is not an affine dependence and is
2965 not representable as a distance vector. */
2966 non_affine_dependence_relation (ddr);
2974 /* Return true when the DDR contains only constant access functions. */
2977 constant_access_functions (const struct data_dependence_relation *ddr)
2981 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2982 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2983 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2989 /* Helper function for the case where DDR_A and DDR_B are the same
2990 multivariate access function with a constant step. For an example
2994 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2997 tree c_1 = CHREC_LEFT (c_2);
2998 tree c_0 = CHREC_LEFT (c_1);
2999 lambda_vector dist_v;
3002 /* Polynomials with more than 2 variables are not handled yet. When
3003 the evolution steps are parameters, it is not possible to
3004 represent the dependence using classical distance vectors. */
3005 if (TREE_CODE (c_0) != INTEGER_CST
3006 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3007 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3009 DDR_AFFINE_P (ddr) = false;
3013 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3014 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3016 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3017 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3018 v1 = int_cst_value (CHREC_RIGHT (c_1));
3019 v2 = int_cst_value (CHREC_RIGHT (c_2));
3032 save_dist_v (ddr, dist_v);
3034 add_outer_distances (ddr, dist_v, x_1);
3037 /* Helper function for the case where DDR_A and DDR_B are the same
3038 access functions. */
3041 add_other_self_distances (struct data_dependence_relation *ddr)
3043 lambda_vector dist_v;
3045 int index_carry = DDR_NB_LOOPS (ddr);
3047 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3049 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3051 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3053 if (!evolution_function_is_univariate_p (access_fun))
3055 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3057 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3061 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3063 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3064 add_multivariate_self_dist (ddr, access_fun);
3066 /* The evolution step is not constant: it varies in
3067 the outer loop, so this cannot be represented by a
3068 distance vector. For example in pr34635.c the
3069 evolution is {0, +, {0, +, 4}_1}_2. */
3070 DDR_AFFINE_P (ddr) = false;
3075 index_carry = MIN (index_carry,
3076 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3077 DDR_LOOP_NEST (ddr)));
3081 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3082 add_outer_distances (ddr, dist_v, index_carry);
3086 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3088 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3090 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3091 save_dist_v (ddr, dist_v);
3094 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3095 is the case for example when access functions are the same and
3096 equal to a constant, as in:
3103 in which case the distance vectors are (0) and (1). */
3106 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3110 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3112 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3113 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3114 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3116 for (j = 0; j < ca->n; j++)
3117 if (affine_function_zero_p (ca->fns[j]))
3119 insert_innermost_unit_dist_vector (ddr);
3123 for (j = 0; j < cb->n; j++)
3124 if (affine_function_zero_p (cb->fns[j]))
3126 insert_innermost_unit_dist_vector (ddr);
3132 /* Compute the classic per loop distance vector. DDR is the data
3133 dependence relation to build a vector from. Return false when fail
3134 to represent the data dependence as a distance vector. */
3137 build_classic_dist_vector (struct data_dependence_relation *ddr,
3138 struct loop *loop_nest)
3140 bool init_b = false;
3141 int index_carry = DDR_NB_LOOPS (ddr);
3142 lambda_vector dist_v;
3144 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3147 if (same_access_functions (ddr))
3149 /* Save the 0 vector. */
3150 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3151 save_dist_v (ddr, dist_v);
3153 if (constant_access_functions (ddr))
3154 add_distance_for_zero_overlaps (ddr);
3156 if (DDR_NB_LOOPS (ddr) > 1)
3157 add_other_self_distances (ddr);
3162 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3163 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3164 dist_v, &init_b, &index_carry))
3167 /* Save the distance vector if we initialized one. */
3170 /* Verify a basic constraint: classic distance vectors should
3171 always be lexicographically positive.
3173 Data references are collected in the order of execution of
3174 the program, thus for the following loop
3176 | for (i = 1; i < 100; i++)
3177 | for (j = 1; j < 100; j++)
3179 | t = T[j+1][i-1]; // A
3180 | T[j][i] = t + 2; // B
3183 references are collected following the direction of the wind:
3184 A then B. The data dependence tests are performed also
3185 following this order, such that we're looking at the distance
3186 separating the elements accessed by A from the elements later
3187 accessed by B. But in this example, the distance returned by
3188 test_dep (A, B) is lexicographically negative (-1, 1), that
3189 means that the access A occurs later than B with respect to
3190 the outer loop, ie. we're actually looking upwind. In this
3191 case we solve test_dep (B, A) looking downwind to the
3192 lexicographically positive solution, that returns the
3193 distance vector (1, -1). */
3194 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3196 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3197 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3200 compute_subscript_distance (ddr);
3201 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3202 save_v, &init_b, &index_carry))
3204 save_dist_v (ddr, save_v);
3205 DDR_REVERSED_P (ddr) = true;
3207 /* In this case there is a dependence forward for all the
3210 | for (k = 1; k < 100; k++)
3211 | for (i = 1; i < 100; i++)
3212 | for (j = 1; j < 100; j++)
3214 | t = T[j+1][i-1]; // A
3215 | T[j][i] = t + 2; // B
3223 if (DDR_NB_LOOPS (ddr) > 1)
3225 add_outer_distances (ddr, save_v, index_carry);
3226 add_outer_distances (ddr, dist_v, index_carry);
3231 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3232 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3234 if (DDR_NB_LOOPS (ddr) > 1)
3236 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3238 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3239 DDR_A (ddr), loop_nest))
3241 compute_subscript_distance (ddr);
3242 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3243 opposite_v, &init_b,
3247 save_dist_v (ddr, save_v);
3248 add_outer_distances (ddr, dist_v, index_carry);
3249 add_outer_distances (ddr, opposite_v, index_carry);
3252 save_dist_v (ddr, save_v);
3257 /* There is a distance of 1 on all the outer loops: Example:
3258 there is a dependence of distance 1 on loop_1 for the array A.
3264 add_outer_distances (ddr, dist_v,
3265 lambda_vector_first_nz (dist_v,
3266 DDR_NB_LOOPS (ddr), 0));
3269 if (dump_file && (dump_flags & TDF_DETAILS))
3273 fprintf (dump_file, "(build_classic_dist_vector\n");
3274 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3276 fprintf (dump_file, " dist_vector = (");
3277 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3278 DDR_NB_LOOPS (ddr));
3279 fprintf (dump_file, " )\n");
3281 fprintf (dump_file, ")\n");
3287 /* Return the direction for a given distance.
3288 FIXME: Computing dir this way is suboptimal, since dir can catch
3289 cases that dist is unable to represent. */
3291 static inline enum data_dependence_direction
3292 dir_from_dist (int dist)
3295 return dir_positive;
3297 return dir_negative;
3302 /* Compute the classic per loop direction vector. DDR is the data
3303 dependence relation to build a vector from. */
3306 build_classic_dir_vector (struct data_dependence_relation *ddr)
3309 lambda_vector dist_v;
3311 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3313 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3315 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3316 dir_v[j] = dir_from_dist (dist_v[j]);
3318 save_dir_v (ddr, dir_v);
3322 /* Helper function. Returns true when there is a dependence between
3323 data references DRA and DRB. */
3326 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3327 struct data_reference *dra,
3328 struct data_reference *drb,
3329 struct loop *loop_nest)
3332 tree last_conflicts;
3333 struct subscript *subscript;
3335 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3338 conflict_function *overlaps_a, *overlaps_b;
3340 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3341 DR_ACCESS_FN (drb, i),
3342 &overlaps_a, &overlaps_b,
3343 &last_conflicts, loop_nest);
3345 if (CF_NOT_KNOWN_P (overlaps_a)
3346 || CF_NOT_KNOWN_P (overlaps_b))
3348 finalize_ddr_dependent (ddr, chrec_dont_know);
3349 dependence_stats.num_dependence_undetermined++;
3350 free_conflict_function (overlaps_a);
3351 free_conflict_function (overlaps_b);
3355 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3356 || CF_NO_DEPENDENCE_P (overlaps_b))
3358 finalize_ddr_dependent (ddr, chrec_known);
3359 dependence_stats.num_dependence_independent++;
3360 free_conflict_function (overlaps_a);
3361 free_conflict_function (overlaps_b);
3367 if (SUB_CONFLICTS_IN_A (subscript))
3368 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3369 if (SUB_CONFLICTS_IN_B (subscript))
3370 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3372 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3373 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3374 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3381 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3384 subscript_dependence_tester (struct data_dependence_relation *ddr,
3385 struct loop *loop_nest)
3388 if (dump_file && (dump_flags & TDF_DETAILS))
3389 fprintf (dump_file, "(subscript_dependence_tester \n");
3391 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3392 dependence_stats.num_dependence_dependent++;
3394 compute_subscript_distance (ddr);
3395 if (build_classic_dist_vector (ddr, loop_nest))
3396 build_classic_dir_vector (ddr);
3398 if (dump_file && (dump_flags & TDF_DETAILS))
3399 fprintf (dump_file, ")\n");
3402 /* Returns true when all the access functions of A are affine or
3403 constant with respect to LOOP_NEST. */
3406 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3407 const struct loop *loop_nest)
3410 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3413 FOR_EACH_VEC_ELT (tree, fns, i, t)
3414 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3415 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3421 /* Initializes an equation for an OMEGA problem using the information
3422 contained in the ACCESS_FUN. Returns true when the operation
3425 PB is the omega constraint system.
3426 EQ is the number of the equation to be initialized.
3427 OFFSET is used for shifting the variables names in the constraints:
3428 a constrain is composed of 2 * the number of variables surrounding
3429 dependence accesses. OFFSET is set either to 0 for the first n variables,
3430 then it is set to n.
3431 ACCESS_FUN is expected to be an affine chrec. */
3434 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3435 unsigned int offset, tree access_fun,
3436 struct data_dependence_relation *ddr)
3438 switch (TREE_CODE (access_fun))
3440 case POLYNOMIAL_CHREC:
3442 tree left = CHREC_LEFT (access_fun);
3443 tree right = CHREC_RIGHT (access_fun);
3444 int var = CHREC_VARIABLE (access_fun);
3447 if (TREE_CODE (right) != INTEGER_CST)
3450 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3451 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3453 /* Compute the innermost loop index. */
3454 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3457 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3458 += int_cst_value (right);
3460 switch (TREE_CODE (left))
3462 case POLYNOMIAL_CHREC:
3463 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3466 pb->eqs[eq].coef[0] += int_cst_value (left);
3475 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3483 /* As explained in the comments preceding init_omega_for_ddr, we have
3484 to set up a system for each loop level, setting outer loops
3485 variation to zero, and current loop variation to positive or zero.
3486 Save each lexico positive distance vector. */
3489 omega_extract_distance_vectors (omega_pb pb,
3490 struct data_dependence_relation *ddr)
3494 struct loop *loopi, *loopj;
3495 enum omega_result res;
3497 /* Set a new problem for each loop in the nest. The basis is the
3498 problem that we have initialized until now. On top of this we
3499 add new constraints. */
3500 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3501 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3504 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3505 DDR_NB_LOOPS (ddr));
3507 omega_copy_problem (copy, pb);
3509 /* For all the outer loops "loop_j", add "dj = 0". */
3511 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3513 eq = omega_add_zero_eq (copy, omega_black);
3514 copy->eqs[eq].coef[j + 1] = 1;
3517 /* For "loop_i", add "0 <= di". */
3518 geq = omega_add_zero_geq (copy, omega_black);
3519 copy->geqs[geq].coef[i + 1] = 1;
3521 /* Reduce the constraint system, and test that the current
3522 problem is feasible. */
3523 res = omega_simplify_problem (copy);
3524 if (res == omega_false
3525 || res == omega_unknown
3526 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3529 for (eq = 0; eq < copy->num_subs; eq++)
3530 if (copy->subs[eq].key == (int) i + 1)
3532 dist = copy->subs[eq].coef[0];
3538 /* Reinitialize problem... */
3539 omega_copy_problem (copy, pb);
3541 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3543 eq = omega_add_zero_eq (copy, omega_black);
3544 copy->eqs[eq].coef[j + 1] = 1;
3547 /* ..., but this time "di = 1". */
3548 eq = omega_add_zero_eq (copy, omega_black);
3549 copy->eqs[eq].coef[i + 1] = 1;
3550 copy->eqs[eq].coef[0] = -1;
3552 res = omega_simplify_problem (copy);
3553 if (res == omega_false
3554 || res == omega_unknown
3555 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3558 for (eq = 0; eq < copy->num_subs; eq++)
3559 if (copy->subs[eq].key == (int) i + 1)
3561 dist = copy->subs[eq].coef[0];
3567 /* Save the lexicographically positive distance vector. */
3570 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3571 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3575 for (eq = 0; eq < copy->num_subs; eq++)
3576 if (copy->subs[eq].key > 0)
3578 dist = copy->subs[eq].coef[0];
3579 dist_v[copy->subs[eq].key - 1] = dist;
3582 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3583 dir_v[j] = dir_from_dist (dist_v[j]);
3585 save_dist_v (ddr, dist_v);
3586 save_dir_v (ddr, dir_v);
3590 omega_free_problem (copy);
3594 /* This is called for each subscript of a tuple of data references:
3595 insert an equality for representing the conflicts. */
3598 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3599 struct data_dependence_relation *ddr,
3600 omega_pb pb, bool *maybe_dependent)
3603 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3604 TREE_TYPE (access_fun_b));
3605 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3606 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3607 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3610 /* When the fun_a - fun_b is not constant, the dependence is not
3611 captured by the classic distance vector representation. */
3612 if (TREE_CODE (difference) != INTEGER_CST)
3616 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3618 /* There is no dependence. */
3619 *maybe_dependent = false;
3623 minus_one = build_int_cst (type, -1);
3624 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3626 eq = omega_add_zero_eq (pb, omega_black);
3627 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3628 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3629 /* There is probably a dependence, but the system of
3630 constraints cannot be built: answer "don't know". */
3634 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3635 && !int_divides_p (lambda_vector_gcd
3636 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3637 2 * DDR_NB_LOOPS (ddr)),
3638 pb->eqs[eq].coef[0]))
3640 /* There is no dependence. */
3641 *maybe_dependent = false;
3648 /* Helper function, same as init_omega_for_ddr but specialized for
3649 data references A and B. */
3652 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3653 struct data_dependence_relation *ddr,
3654 omega_pb pb, bool *maybe_dependent)
3659 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3661 /* Insert an equality per subscript. */
3662 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3664 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3665 ddr, pb, maybe_dependent))
3667 else if (*maybe_dependent == false)
3669 /* There is no dependence. */
3670 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3675 /* Insert inequalities: constraints corresponding to the iteration
3676 domain, i.e. the loops surrounding the references "loop_x" and
3677 the distance variables "dx". The layout of the OMEGA
3678 representation is as follows:
3679 - coef[0] is the constant
3680 - coef[1..nb_loops] are the protected variables that will not be
3681 removed by the solver: the "dx"
3682 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3684 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3685 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3687 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3690 ineq = omega_add_zero_geq (pb, omega_black);
3691 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3693 /* 0 <= loop_x + dx */
3694 ineq = omega_add_zero_geq (pb, omega_black);
3695 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3696 pb->geqs[ineq].coef[i + 1] = 1;
3700 /* loop_x <= nb_iters */
3701 ineq = omega_add_zero_geq (pb, omega_black);
3702 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3703 pb->geqs[ineq].coef[0] = nbi;
3705 /* loop_x + dx <= nb_iters */
3706 ineq = omega_add_zero_geq (pb, omega_black);
3707 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3708 pb->geqs[ineq].coef[i + 1] = -1;
3709 pb->geqs[ineq].coef[0] = nbi;
3711 /* A step "dx" bigger than nb_iters is not feasible, so
3712 add "0 <= nb_iters + dx", */
3713 ineq = omega_add_zero_geq (pb, omega_black);
3714 pb->geqs[ineq].coef[i + 1] = 1;
3715 pb->geqs[ineq].coef[0] = nbi;
3716 /* and "dx <= nb_iters". */
3717 ineq = omega_add_zero_geq (pb, omega_black);
3718 pb->geqs[ineq].coef[i + 1] = -1;
3719 pb->geqs[ineq].coef[0] = nbi;
3723 omega_extract_distance_vectors (pb, ddr);
3728 /* Sets up the Omega dependence problem for the data dependence
3729 relation DDR. Returns false when the constraint system cannot be
3730 built, ie. when the test answers "don't know". Returns true
3731 otherwise, and when independence has been proved (using one of the
3732 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3733 set MAYBE_DEPENDENT to true.
3735 Example: for setting up the dependence system corresponding to the
3736 conflicting accesses
3741 | ... A[2*j, 2*(i + j)]
3745 the following constraints come from the iteration domain:
3752 where di, dj are the distance variables. The constraints
3753 representing the conflicting elements are:
3756 i + 1 = 2 * (i + di + j + dj)
3758 For asking that the resulting distance vector (di, dj) be
3759 lexicographically positive, we insert the constraint "di >= 0". If
3760 "di = 0" in the solution, we fix that component to zero, and we
3761 look at the inner loops: we set a new problem where all the outer
3762 loop distances are zero, and fix this inner component to be
3763 positive. When one of the components is positive, we save that
3764 distance, and set a new problem where the distance on this loop is
3765 zero, searching for other distances in the inner loops. Here is
3766 the classic example that illustrates that we have to set for each
3767 inner loop a new problem:
3775 we have to save two distances (1, 0) and (0, 1).
3777 Given two array references, refA and refB, we have to set the
3778 dependence problem twice, refA vs. refB and refB vs. refA, and we
3779 cannot do a single test, as refB might occur before refA in the
3780 inner loops, and the contrary when considering outer loops: ex.
3785 | T[{1,+,1}_2][{1,+,1}_1] // refA
3786 | T[{2,+,1}_2][{0,+,1}_1] // refB
3791 refB touches the elements in T before refA, and thus for the same
3792 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3793 but for successive loop_0 iterations, we have (1, -1, 1)
3795 The Omega solver expects the distance variables ("di" in the
3796 previous example) to come first in the constraint system (as
3797 variables to be protected, or "safe" variables), the constraint
3798 system is built using the following layout:
3800 "cst | distance vars | index vars".
3804 init_omega_for_ddr (struct data_dependence_relation *ddr,
3805 bool *maybe_dependent)
3810 *maybe_dependent = true;
3812 if (same_access_functions (ddr))
3815 lambda_vector dir_v;
3817 /* Save the 0 vector. */
3818 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3819 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3820 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3821 dir_v[j] = dir_equal;
3822 save_dir_v (ddr, dir_v);
3824 /* Save the dependences carried by outer loops. */
3825 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3826 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3828 omega_free_problem (pb);
3832 /* Omega expects the protected variables (those that have to be kept
3833 after elimination) to appear first in the constraint system.
3834 These variables are the distance variables. In the following
3835 initialization we declare NB_LOOPS safe variables, and the total
3836 number of variables for the constraint system is 2*NB_LOOPS. */
3837 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3838 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3840 omega_free_problem (pb);
3842 /* Stop computation if not decidable, or no dependence. */
3843 if (res == false || *maybe_dependent == false)
3846 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3847 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3849 omega_free_problem (pb);
3854 /* Return true when DDR contains the same information as that stored
3855 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3858 ddr_consistent_p (FILE *file,
3859 struct data_dependence_relation *ddr,
3860 VEC (lambda_vector, heap) *dist_vects,
3861 VEC (lambda_vector, heap) *dir_vects)
3865 /* If dump_file is set, output there. */
3866 if (dump_file && (dump_flags & TDF_DETAILS))
3869 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3871 lambda_vector b_dist_v;
3872 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3873 VEC_length (lambda_vector, dist_vects),
3874 DDR_NUM_DIST_VECTS (ddr));
3876 fprintf (file, "Banerjee dist vectors:\n");
3877 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3878 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3880 fprintf (file, "Omega dist vectors:\n");
3881 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3882 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3884 fprintf (file, "data dependence relation:\n");
3885 dump_data_dependence_relation (file, ddr);
3887 fprintf (file, ")\n");
3891 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3893 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3894 VEC_length (lambda_vector, dir_vects),
3895 DDR_NUM_DIR_VECTS (ddr));
3899 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3901 lambda_vector a_dist_v;
3902 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3904 /* Distance vectors are not ordered in the same way in the DDR
3905 and in the DIST_VECTS: search for a matching vector. */
3906 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3907 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3910 if (j == VEC_length (lambda_vector, dist_vects))
3912 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3913 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3914 fprintf (file, "not found in Omega dist vectors:\n");
3915 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3916 fprintf (file, "data dependence relation:\n");
3917 dump_data_dependence_relation (file, ddr);
3918 fprintf (file, ")\n");
3922 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3924 lambda_vector a_dir_v;
3925 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3927 /* Direction vectors are not ordered in the same way in the DDR
3928 and in the DIR_VECTS: search for a matching vector. */
3929 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3930 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3933 if (j == VEC_length (lambda_vector, dist_vects))
3935 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3936 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3937 fprintf (file, "not found in Omega dir vectors:\n");
3938 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3939 fprintf (file, "data dependence relation:\n");
3940 dump_data_dependence_relation (file, ddr);
3941 fprintf (file, ")\n");
3948 /* This computes the affine dependence relation between A and B with
3949 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3950 independence between two accesses, while CHREC_DONT_KNOW is used
3951 for representing the unknown relation.
3953 Note that it is possible to stop the computation of the dependence
3954 relation the first time we detect a CHREC_KNOWN element for a given
3958 compute_affine_dependence (struct data_dependence_relation *ddr,
3959 struct loop *loop_nest)
3961 struct data_reference *dra = DDR_A (ddr);
3962 struct data_reference *drb = DDR_B (ddr);
3964 if (dump_file && (dump_flags & TDF_DETAILS))
3966 fprintf (dump_file, "(compute_affine_dependence\n");
3967 fprintf (dump_file, " (stmt_a = \n");
3968 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3969 fprintf (dump_file, ")\n (stmt_b = \n");
3970 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3971 fprintf (dump_file, ")\n");
3974 /* Analyze only when the dependence relation is not yet known. */
3975 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3976 && !DDR_SELF_REFERENCE (ddr))
3978 dependence_stats.num_dependence_tests++;
3980 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3981 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3983 if (flag_check_data_deps)
3985 /* Compute the dependences using the first algorithm. */
3986 subscript_dependence_tester (ddr, loop_nest);
3988 if (dump_file && (dump_flags & TDF_DETAILS))
3990 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3991 dump_data_dependence_relation (dump_file, ddr);
3994 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3996 bool maybe_dependent;
3997 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3999 /* Save the result of the first DD analyzer. */
4000 dist_vects = DDR_DIST_VECTS (ddr);
4001 dir_vects = DDR_DIR_VECTS (ddr);
4003 /* Reset the information. */
4004 DDR_DIST_VECTS (ddr) = NULL;
4005 DDR_DIR_VECTS (ddr) = NULL;
4007 /* Compute the same information using Omega. */
4008 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4009 goto csys_dont_know;
4011 if (dump_file && (dump_flags & TDF_DETAILS))
4013 fprintf (dump_file, "Omega Analyzer\n");
4014 dump_data_dependence_relation (dump_file, ddr);
4017 /* Check that we get the same information. */
4018 if (maybe_dependent)
4019 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4024 subscript_dependence_tester (ddr, loop_nest);
4027 /* As a last case, if the dependence cannot be determined, or if
4028 the dependence is considered too difficult to determine, answer
4033 dependence_stats.num_dependence_undetermined++;
4035 if (dump_file && (dump_flags & TDF_DETAILS))
4037 fprintf (dump_file, "Data ref a:\n");
4038 dump_data_reference (dump_file, dra);
4039 fprintf (dump_file, "Data ref b:\n");
4040 dump_data_reference (dump_file, drb);
4041 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4043 finalize_ddr_dependent (ddr, chrec_dont_know);
4047 if (dump_file && (dump_flags & TDF_DETAILS))
4048 fprintf (dump_file, ")\n");
4051 /* This computes the dependence relation for the same data
4052 reference into DDR. */
4055 compute_self_dependence (struct data_dependence_relation *ddr)
4058 struct subscript *subscript;
4060 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4063 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4066 if (SUB_CONFLICTS_IN_A (subscript))
4067 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4068 if (SUB_CONFLICTS_IN_B (subscript))
4069 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4071 /* The accessed index overlaps for each iteration. */
4072 SUB_CONFLICTS_IN_A (subscript)
4073 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4074 SUB_CONFLICTS_IN_B (subscript)
4075 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4076 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4079 /* The distance vector is the zero vector. */
4080 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4081 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4084 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4085 the data references in DATAREFS, in the LOOP_NEST. When
4086 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4090 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4091 VEC (ddr_p, heap) **dependence_relations,
4092 VEC (loop_p, heap) *loop_nest,
4093 bool compute_self_and_rr)
4095 struct data_dependence_relation *ddr;
4096 struct data_reference *a, *b;
4099 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4100 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4101 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4103 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4104 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4106 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4109 if (compute_self_and_rr)
4110 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4112 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4113 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4114 compute_self_dependence (ddr);
4118 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4119 true if STMT clobbers memory, false otherwise. */
4122 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4124 bool clobbers_memory = false;
4127 enum gimple_code stmt_code = gimple_code (stmt);
4131 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4132 Calls have side-effects, except those to const or pure
4134 if ((stmt_code == GIMPLE_CALL
4135 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4136 || (stmt_code == GIMPLE_ASM
4137 && gimple_asm_volatile_p (stmt)))
4138 clobbers_memory = true;
4140 if (!gimple_vuse (stmt))
4141 return clobbers_memory;
4143 if (stmt_code == GIMPLE_ASSIGN)
4146 op0 = gimple_assign_lhs_ptr (stmt);
4147 op1 = gimple_assign_rhs1_ptr (stmt);
4150 || (REFERENCE_CLASS_P (*op1)
4151 && (base = get_base_address (*op1))
4152 && TREE_CODE (base) != SSA_NAME))
4154 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4156 ref->is_read = true;
4159 else if (stmt_code == GIMPLE_CALL)
4163 op0 = gimple_call_lhs_ptr (stmt);
4164 n = gimple_call_num_args (stmt);
4165 for (i = 0; i < n; i++)
4167 op1 = gimple_call_arg_ptr (stmt, i);
4170 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4172 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4174 ref->is_read = true;
4179 return clobbers_memory;
4183 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4185 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4187 ref->is_read = false;
4189 return clobbers_memory;
4192 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4193 reference, returns false, otherwise returns true. NEST is the outermost
4194 loop of the loop nest in which the references should be analyzed. */
4197 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4198 VEC (data_reference_p, heap) **datarefs)
4201 VEC (data_ref_loc, heap) *references;
4204 data_reference_p dr;
4206 if (get_references_in_stmt (stmt, &references))
4208 VEC_free (data_ref_loc, heap, references);
4212 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4214 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4215 *ref->pos, stmt, ref->is_read);
4216 gcc_assert (dr != NULL);
4217 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4219 VEC_free (data_ref_loc, heap, references);
4223 /* Stores the data references in STMT to DATAREFS. If there is an
4224 unanalyzable reference, returns false, otherwise returns true.
4225 NEST is the outermost loop of the loop nest in which the references
4226 should be instantiated, LOOP is the loop in which the references
4227 should be analyzed. */
4230 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4231 VEC (data_reference_p, heap) **datarefs)
4234 VEC (data_ref_loc, heap) *references;
4237 data_reference_p dr;
4239 if (get_references_in_stmt (stmt, &references))
4241 VEC_free (data_ref_loc, heap, references);
4245 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4247 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4248 gcc_assert (dr != NULL);
4249 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4252 VEC_free (data_ref_loc, heap, references);
4256 /* Search the data references in LOOP, and record the information into
4257 DATAREFS. Returns chrec_dont_know when failing to analyze a
4258 difficult case, returns NULL_TREE otherwise. */
4261 find_data_references_in_bb (struct loop *loop, basic_block bb,
4262 VEC (data_reference_p, heap) **datarefs)
4264 gimple_stmt_iterator bsi;
4266 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4268 gimple stmt = gsi_stmt (bsi);
4270 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4272 struct data_reference *res;
4273 res = XCNEW (struct data_reference);
4274 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4276 return chrec_dont_know;
4283 /* Search the data references in LOOP, and record the information into
4284 DATAREFS. Returns chrec_dont_know when failing to analyze a
4285 difficult case, returns NULL_TREE otherwise.
4287 TODO: This function should be made smarter so that it can handle address
4288 arithmetic as if they were array accesses, etc. */
4291 find_data_references_in_loop (struct loop *loop,
4292 VEC (data_reference_p, heap) **datarefs)
4294 basic_block bb, *bbs;
4297 bbs = get_loop_body_in_dom_order (loop);
4299 for (i = 0; i < loop->num_nodes; i++)
4303 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4306 return chrec_dont_know;
4314 /* Recursive helper function. */
4317 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4319 /* Inner loops of the nest should not contain siblings. Example:
4320 when there are two consecutive loops,
4331 the dependence relation cannot be captured by the distance
4336 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4338 return find_loop_nest_1 (loop->inner, loop_nest);
4342 /* Return false when the LOOP is not well nested. Otherwise return
4343 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4344 contain the loops from the outermost to the innermost, as they will
4345 appear in the classic distance vector. */
4348 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4350 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4352 return find_loop_nest_1 (loop->inner, loop_nest);
4356 /* Returns true when the data dependences have been computed, false otherwise.
4357 Given a loop nest LOOP, the following vectors are returned:
4358 DATAREFS is initialized to all the array elements contained in this loop,
4359 DEPENDENCE_RELATIONS contains the relations between the data references.
4360 Compute read-read and self relations if
4361 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4364 compute_data_dependences_for_loop (struct loop *loop,
4365 bool compute_self_and_read_read_dependences,
4366 VEC (loop_p, heap) **loop_nest,
4367 VEC (data_reference_p, heap) **datarefs,
4368 VEC (ddr_p, heap) **dependence_relations)
4372 memset (&dependence_stats, 0, sizeof (dependence_stats));
4374 /* If the loop nest is not well formed, or one of the data references
4375 is not computable, give up without spending time to compute other
4378 || !find_loop_nest (loop, loop_nest)
4379 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4381 struct data_dependence_relation *ddr;
4383 /* Insert a single relation into dependence_relations:
4385 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4386 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4390 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4391 compute_self_and_read_read_dependences);
4393 if (dump_file && (dump_flags & TDF_STATS))
4395 fprintf (dump_file, "Dependence tester statistics:\n");
4397 fprintf (dump_file, "Number of dependence tests: %d\n",
4398 dependence_stats.num_dependence_tests);
4399 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4400 dependence_stats.num_dependence_dependent);
4401 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4402 dependence_stats.num_dependence_independent);
4403 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4404 dependence_stats.num_dependence_undetermined);
4406 fprintf (dump_file, "Number of subscript tests: %d\n",
4407 dependence_stats.num_subscript_tests);
4408 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4409 dependence_stats.num_subscript_undetermined);
4410 fprintf (dump_file, "Number of same subscript function: %d\n",
4411 dependence_stats.num_same_subscript_function);
4413 fprintf (dump_file, "Number of ziv tests: %d\n",
4414 dependence_stats.num_ziv);
4415 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4416 dependence_stats.num_ziv_dependent);
4417 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4418 dependence_stats.num_ziv_independent);
4419 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4420 dependence_stats.num_ziv_unimplemented);
4422 fprintf (dump_file, "Number of siv tests: %d\n",
4423 dependence_stats.num_siv);
4424 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4425 dependence_stats.num_siv_dependent);
4426 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4427 dependence_stats.num_siv_independent);
4428 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4429 dependence_stats.num_siv_unimplemented);
4431 fprintf (dump_file, "Number of miv tests: %d\n",
4432 dependence_stats.num_miv);
4433 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4434 dependence_stats.num_miv_dependent);
4435 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4436 dependence_stats.num_miv_independent);
4437 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4438 dependence_stats.num_miv_unimplemented);
4444 /* Returns true when the data dependences for the basic block BB have been
4445 computed, false otherwise.
4446 DATAREFS is initialized to all the array elements contained in this basic
4447 block, DEPENDENCE_RELATIONS contains the relations between the data
4448 references. Compute read-read and self relations if
4449 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4451 compute_data_dependences_for_bb (basic_block bb,
4452 bool compute_self_and_read_read_dependences,
4453 VEC (data_reference_p, heap) **datarefs,
4454 VEC (ddr_p, heap) **dependence_relations)
4456 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4459 compute_all_dependences (*datarefs, dependence_relations, NULL,
4460 compute_self_and_read_read_dependences);
4464 /* Entry point (for testing only). Analyze all the data references
4465 and the dependence relations in LOOP.
4467 The data references are computed first.
4469 A relation on these nodes is represented by a complete graph. Some
4470 of the relations could be of no interest, thus the relations can be
4473 In the following function we compute all the relations. This is
4474 just a first implementation that is here for:
4475 - for showing how to ask for the dependence relations,
4476 - for the debugging the whole dependence graph,
4477 - for the dejagnu testcases and maintenance.
4479 It is possible to ask only for a part of the graph, avoiding to
4480 compute the whole dependence graph. The computed dependences are
4481 stored in a knowledge base (KB) such that later queries don't
4482 recompute the same information. The implementation of this KB is
4483 transparent to the optimizer, and thus the KB can be changed with a
4484 more efficient implementation, or the KB could be disabled. */
4486 analyze_all_data_dependences (struct loop *loop)
4489 int nb_data_refs = 10;
4490 VEC (data_reference_p, heap) *datarefs =
4491 VEC_alloc (data_reference_p, heap, nb_data_refs);
4492 VEC (ddr_p, heap) *dependence_relations =
4493 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4494 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4496 /* Compute DDs on the whole function. */
4497 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4498 &dependence_relations);
4502 dump_data_dependence_relations (dump_file, dependence_relations);
4503 fprintf (dump_file, "\n\n");
4505 if (dump_flags & TDF_DETAILS)
4506 dump_dist_dir_vectors (dump_file, dependence_relations);
4508 if (dump_flags & TDF_STATS)
4510 unsigned nb_top_relations = 0;
4511 unsigned nb_bot_relations = 0;
4512 unsigned nb_chrec_relations = 0;
4513 struct data_dependence_relation *ddr;
4515 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4517 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4520 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4524 nb_chrec_relations++;
4527 gather_stats_on_scev_database ();
4531 VEC_free (loop_p, heap, loop_nest);
4532 free_dependence_relations (dependence_relations);
4533 free_data_refs (datarefs);
4536 /* Computes all the data dependences and check that the results of
4537 several analyzers are the same. */
4540 tree_check_data_deps (void)
4543 struct loop *loop_nest;
4545 FOR_EACH_LOOP (li, loop_nest, 0)
4546 analyze_all_data_dependences (loop_nest);
4549 /* Free the memory used by a data dependence relation DDR. */
4552 free_dependence_relation (struct data_dependence_relation *ddr)
4557 if (DDR_SUBSCRIPTS (ddr))
4558 free_subscripts (DDR_SUBSCRIPTS (ddr));
4559 if (DDR_DIST_VECTS (ddr))
4560 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4561 if (DDR_DIR_VECTS (ddr))
4562 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4567 /* Free the memory used by the data dependence relations from
4568 DEPENDENCE_RELATIONS. */
4571 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4574 struct data_dependence_relation *ddr;
4576 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4578 free_dependence_relation (ddr);
4580 VEC_free (ddr_p, heap, dependence_relations);
4583 /* Free the memory used by the data references from DATAREFS. */
4586 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4589 struct data_reference *dr;
4591 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4593 VEC_free (data_reference_p, heap, datarefs);
4598 /* Dump vertex I in RDG to FILE. */
4601 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4603 struct vertex *v = &(rdg->vertices[i]);
4604 struct graph_edge *e;
4606 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4607 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4608 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4611 for (e = v->pred; e; e = e->pred_next)
4612 fprintf (file, " %d", e->src);
4614 fprintf (file, ") (out:");
4617 for (e = v->succ; e; e = e->succ_next)
4618 fprintf (file, " %d", e->dest);
4620 fprintf (file, ")\n");
4621 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4622 fprintf (file, ")\n");
4625 /* Call dump_rdg_vertex on stderr. */
4628 debug_rdg_vertex (struct graph *rdg, int i)
4630 dump_rdg_vertex (stderr, rdg, i);
4633 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4634 dumped vertices to that bitmap. */
4636 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4640 fprintf (file, "(%d\n", c);
4642 for (i = 0; i < rdg->n_vertices; i++)
4643 if (rdg->vertices[i].component == c)
4646 bitmap_set_bit (dumped, i);
4648 dump_rdg_vertex (file, rdg, i);
4651 fprintf (file, ")\n");
4654 /* Call dump_rdg_vertex on stderr. */
4657 debug_rdg_component (struct graph *rdg, int c)
4659 dump_rdg_component (stderr, rdg, c, NULL);
4662 /* Dump the reduced dependence graph RDG to FILE. */
4665 dump_rdg (FILE *file, struct graph *rdg)
4668 bitmap dumped = BITMAP_ALLOC (NULL);
4670 fprintf (file, "(rdg\n");
4672 for (i = 0; i < rdg->n_vertices; i++)
4673 if (!bitmap_bit_p (dumped, i))
4674 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4676 fprintf (file, ")\n");
4677 BITMAP_FREE (dumped);
4680 /* Call dump_rdg on stderr. */
4683 debug_rdg (struct graph *rdg)
4685 dump_rdg (stderr, rdg);
4689 dot_rdg_1 (FILE *file, struct graph *rdg)
4693 fprintf (file, "digraph RDG {\n");
4695 for (i = 0; i < rdg->n_vertices; i++)
4697 struct vertex *v = &(rdg->vertices[i]);
4698 struct graph_edge *e;
4700 /* Highlight reads from memory. */
4701 if (RDG_MEM_READS_STMT (rdg, i))
4702 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4704 /* Highlight stores to memory. */
4705 if (RDG_MEM_WRITE_STMT (rdg, i))
4706 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4709 for (e = v->succ; e; e = e->succ_next)
4710 switch (RDGE_TYPE (e))
4713 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4717 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4721 /* These are the most common dependences: don't print these. */
4722 fprintf (file, "%d -> %d \n", i, e->dest);
4726 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4734 fprintf (file, "}\n\n");
4737 /* Display the Reduced Dependence Graph using dotty. */
4738 extern void dot_rdg (struct graph *);
4741 dot_rdg (struct graph *rdg)
4743 /* When debugging, enable the following code. This cannot be used
4744 in production compilers because it calls "system". */
4746 FILE *file = fopen ("/tmp/rdg.dot", "w");
4747 gcc_assert (file != NULL);
4749 dot_rdg_1 (file, rdg);
4752 system ("dotty /tmp/rdg.dot &");
4754 dot_rdg_1 (stderr, rdg);
4758 /* This structure is used for recording the mapping statement index in
4761 struct GTY(()) rdg_vertex_info
4767 /* Returns the index of STMT in RDG. */
4770 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4772 struct rdg_vertex_info rvi, *slot;
4775 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4783 /* Creates an edge in RDG for each distance vector from DDR. The
4784 order that we keep track of in the RDG is the order in which
4785 statements have to be executed. */
4788 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4790 struct graph_edge *e;
4792 data_reference_p dra = DDR_A (ddr);
4793 data_reference_p drb = DDR_B (ddr);
4794 unsigned level = ddr_dependence_level (ddr);
4796 /* For non scalar dependences, when the dependence is REVERSED,
4797 statement B has to be executed before statement A. */
4799 && !DDR_REVERSED_P (ddr))
4801 data_reference_p tmp = dra;
4806 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4807 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4809 if (va < 0 || vb < 0)
4812 e = add_edge (rdg, va, vb);
4813 e->data = XNEW (struct rdg_edge);
4815 RDGE_LEVEL (e) = level;
4816 RDGE_RELATION (e) = ddr;
4818 /* Determines the type of the data dependence. */
4819 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4820 RDGE_TYPE (e) = input_dd;
4821 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4822 RDGE_TYPE (e) = output_dd;
4823 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4824 RDGE_TYPE (e) = flow_dd;
4825 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4826 RDGE_TYPE (e) = anti_dd;
4829 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4830 the index of DEF in RDG. */
4833 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4835 use_operand_p imm_use_p;
4836 imm_use_iterator iterator;
4838 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4840 struct graph_edge *e;
4841 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4846 e = add_edge (rdg, idef, use);
4847 e->data = XNEW (struct rdg_edge);
4848 RDGE_TYPE (e) = flow_dd;
4849 RDGE_RELATION (e) = NULL;
4853 /* Creates the edges of the reduced dependence graph RDG. */
4856 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4859 struct data_dependence_relation *ddr;
4860 def_operand_p def_p;
4863 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4864 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4865 create_rdg_edge_for_ddr (rdg, ddr);
4867 for (i = 0; i < rdg->n_vertices; i++)
4868 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4870 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4873 /* Build the vertices of the reduced dependence graph RDG. */
4876 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4881 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4883 VEC (data_ref_loc, heap) *references;
4885 struct vertex *v = &(rdg->vertices[i]);
4886 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4887 struct rdg_vertex_info **slot;
4891 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4898 v->data = XNEW (struct rdg_vertex);
4899 RDG_STMT (rdg, i) = stmt;
4901 RDG_MEM_WRITE_STMT (rdg, i) = false;
4902 RDG_MEM_READS_STMT (rdg, i) = false;
4903 if (gimple_code (stmt) == GIMPLE_PHI)
4906 get_references_in_stmt (stmt, &references);
4907 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4909 RDG_MEM_WRITE_STMT (rdg, i) = true;
4911 RDG_MEM_READS_STMT (rdg, i) = true;
4913 VEC_free (data_ref_loc, heap, references);
4917 /* Initialize STMTS with all the statements of LOOP. When
4918 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4919 which we discover statements is important as
4920 generate_loops_for_partition is using the same traversal for
4921 identifying statements. */
4924 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4927 basic_block *bbs = get_loop_body_in_dom_order (loop);
4929 for (i = 0; i < loop->num_nodes; i++)
4931 basic_block bb = bbs[i];
4932 gimple_stmt_iterator bsi;
4935 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4936 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4938 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4940 stmt = gsi_stmt (bsi);
4941 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4942 VEC_safe_push (gimple, heap, *stmts, stmt);
4949 /* Returns true when all the dependences are computable. */
4952 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4957 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4958 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4964 /* Computes a hash function for element ELT. */
4967 hash_stmt_vertex_info (const void *elt)
4969 const struct rdg_vertex_info *const rvi =
4970 (const struct rdg_vertex_info *) elt;
4971 gimple stmt = rvi->stmt;
4973 return htab_hash_pointer (stmt);
4976 /* Compares database elements E1 and E2. */
4979 eq_stmt_vertex_info (const void *e1, const void *e2)
4981 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4982 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4984 return elt1->stmt == elt2->stmt;
4987 /* Free the element E. */
4990 hash_stmt_vertex_del (void *e)
4995 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4996 statement of the loop nest, and one edge per data dependence or
4997 scalar dependence. */
5000 build_empty_rdg (int n_stmts)
5002 int nb_data_refs = 10;
5003 struct graph *rdg = new_graph (n_stmts);
5005 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5006 eq_stmt_vertex_info, hash_stmt_vertex_del);
5010 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5011 statement of the loop nest, and one edge per data dependence or
5012 scalar dependence. */
5015 build_rdg (struct loop *loop,
5016 VEC (loop_p, heap) **loop_nest,
5017 VEC (ddr_p, heap) **dependence_relations,
5018 VEC (data_reference_p, heap) **datarefs)
5020 struct graph *rdg = NULL;
5021 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5023 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5024 dependence_relations);
5026 if (known_dependences_p (*dependence_relations))
5028 stmts_from_loop (loop, &stmts);
5029 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5030 create_rdg_vertices (rdg, stmts);
5031 create_rdg_edges (rdg, *dependence_relations);
5034 VEC_free (gimple, heap, stmts);
5038 /* Free the reduced dependence graph RDG. */
5041 free_rdg (struct graph *rdg)
5045 for (i = 0; i < rdg->n_vertices; i++)
5047 struct vertex *v = &(rdg->vertices[i]);
5048 struct graph_edge *e;
5050 for (e = v->succ; e; e = e->succ_next)
5056 htab_delete (rdg->indices);
5060 /* Initialize STMTS with all the statements of LOOP that contain a
5064 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5067 basic_block *bbs = get_loop_body_in_dom_order (loop);
5069 for (i = 0; i < loop->num_nodes; i++)
5071 basic_block bb = bbs[i];
5072 gimple_stmt_iterator bsi;
5074 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5075 if (gimple_vdef (gsi_stmt (bsi)))
5076 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5082 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5083 that contains a data reference on its LHS with a stride of the same
5084 size as its unit type. */
5087 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5091 struct data_reference *dr;
5094 || !gimple_vdef (stmt)
5095 || !is_gimple_assign (stmt)
5096 || !gimple_assign_single_p (stmt)
5097 || !(op1 = gimple_assign_rhs1 (stmt))
5098 || !(integer_zerop (op1) || real_zerop (op1)))
5101 dr = XCNEW (struct data_reference);
5102 op0 = gimple_assign_lhs (stmt);
5104 DR_STMT (dr) = stmt;
5107 res = dr_analyze_innermost (dr)
5108 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5114 /* Initialize STMTS with all the statements of LOOP that contain a
5115 store to memory of the form "A[i] = 0". */
5118 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5122 gimple_stmt_iterator si;
5124 basic_block *bbs = get_loop_body_in_dom_order (loop);
5126 for (i = 0; i < loop->num_nodes; i++)
5127 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5128 if ((stmt = gsi_stmt (si))
5129 && stmt_with_adjacent_zero_store_dr_p (stmt))
5130 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5135 /* For a data reference REF, return the declaration of its base
5136 address or NULL_TREE if the base is not determined. */
5139 ref_base_address (gimple stmt, data_ref_loc *ref)
5141 tree base = NULL_TREE;
5143 struct data_reference *dr = XCNEW (struct data_reference);
5145 DR_STMT (dr) = stmt;
5146 DR_REF (dr) = *ref->pos;
5147 dr_analyze_innermost (dr);
5148 base_address = DR_BASE_ADDRESS (dr);
5153 switch (TREE_CODE (base_address))
5156 base = TREE_OPERAND (base_address, 0);
5160 base = base_address;
5169 /* Determines whether the statement from vertex V of the RDG has a
5170 definition used outside the loop that contains this statement. */
5173 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5175 gimple stmt = RDG_STMT (rdg, v);
5176 struct loop *loop = loop_containing_stmt (stmt);
5177 use_operand_p imm_use_p;
5178 imm_use_iterator iterator;
5180 def_operand_p def_p;
5185 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5187 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5189 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5197 /* Determines whether statements S1 and S2 access to similar memory
5198 locations. Two memory accesses are considered similar when they
5199 have the same base address declaration, i.e. when their
5200 ref_base_address is the same. */
5203 have_similar_memory_accesses (gimple s1, gimple s2)
5207 VEC (data_ref_loc, heap) *refs1, *refs2;
5208 data_ref_loc *ref1, *ref2;
5210 get_references_in_stmt (s1, &refs1);
5211 get_references_in_stmt (s2, &refs2);
5213 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5215 tree base1 = ref_base_address (s1, ref1);
5218 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5219 if (base1 == ref_base_address (s2, ref2))
5227 VEC_free (data_ref_loc, heap, refs1);
5228 VEC_free (data_ref_loc, heap, refs2);
5232 /* Helper function for the hashtab. */
5235 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5237 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5238 CONST_CAST_GIMPLE ((const_gimple) s2));
5241 /* Helper function for the hashtab. */
5244 ref_base_address_1 (const void *s)
5246 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5248 VEC (data_ref_loc, heap) *refs;
5252 get_references_in_stmt (stmt, &refs);
5254 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5257 res = htab_hash_pointer (ref_base_address (stmt, ref));
5261 VEC_free (data_ref_loc, heap, refs);
5265 /* Try to remove duplicated write data references from STMTS. */
5268 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5272 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5273 have_similar_memory_accesses_1, NULL);
5275 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5279 slot = htab_find_slot (seen, stmt, INSERT);
5282 VEC_ordered_remove (gimple, *stmts, i);
5285 *slot = (void *) stmt;
5293 /* Returns the index of PARAMETER in the parameters vector of the
5294 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5297 access_matrix_get_index_for_parameter (tree parameter,
5298 struct access_matrix *access_matrix)
5301 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5302 tree lambda_parameter;
5304 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5305 if (lambda_parameter == parameter)
5306 return i + AM_NB_INDUCTION_VARS (access_matrix);