1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
32 The goals of this analysis are:
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
47 - to define a knowledge base for storing the data dependence
50 - to define an interface to access this data.
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
63 has an integer solution x = 1 and y = -1.
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
79 #include "coretypes.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-flow.h"
83 #include "tree-data-ref.h"
84 #include "tree-scalar-evolution.h"
85 #include "tree-pass.h"
86 #include "langhooks.h"
88 static struct datadep_stats
90 int num_dependence_tests;
91 int num_dependence_dependent;
92 int num_dependence_independent;
93 int num_dependence_undetermined;
95 int num_subscript_tests;
96 int num_subscript_undetermined;
97 int num_same_subscript_function;
100 int num_ziv_independent;
101 int num_ziv_dependent;
102 int num_ziv_unimplemented;
105 int num_siv_independent;
106 int num_siv_dependent;
107 int num_siv_unimplemented;
110 int num_miv_independent;
111 int num_miv_dependent;
112 int num_miv_unimplemented;
115 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
116 struct data_reference *,
117 struct data_reference *,
119 /* Returns true iff A divides B. */
122 tree_fold_divides_p (const_tree a, const_tree b)
124 gcc_assert (TREE_CODE (a) == INTEGER_CST);
125 gcc_assert (TREE_CODE (b) == INTEGER_CST);
126 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
129 /* Returns true iff A divides B. */
132 int_divides_p (int a, int b)
134 return ((b % a) == 0);
139 /* Dump into FILE all the data references from DATAREFS. */
142 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
145 struct data_reference *dr;
147 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
148 dump_data_reference (file, dr);
151 /* Dump into STDERR all the data references from DATAREFS. */
154 debug_data_references (VEC (data_reference_p, heap) *datarefs)
156 dump_data_references (stderr, datarefs);
159 /* Dump to STDERR all the dependence relations from DDRS. */
162 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 dump_data_dependence_relations (stderr, ddrs);
167 /* Dump into FILE all the dependence relations from DDRS. */
170 dump_data_dependence_relations (FILE *file,
171 VEC (ddr_p, heap) *ddrs)
174 struct data_dependence_relation *ddr;
176 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
177 dump_data_dependence_relation (file, ddr);
180 /* Print to STDERR the data_reference DR. */
183 debug_data_reference (struct data_reference *dr)
185 dump_data_reference (stderr, dr);
188 /* Dump function for a DATA_REFERENCE structure. */
191 dump_data_reference (FILE *outf,
192 struct data_reference *dr)
196 fprintf (outf, "#(Data Ref: \n");
197 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index);
198 fprintf (outf, "# stmt: ");
199 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
200 fprintf (outf, "# ref: ");
201 print_generic_stmt (outf, DR_REF (dr), 0);
202 fprintf (outf, "# base_object: ");
203 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
205 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
207 fprintf (outf, "# Access function %d: ", i);
208 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
210 fprintf (outf, "#)\n");
213 /* Dumps the affine function described by FN to the file OUTF. */
216 dump_affine_function (FILE *outf, affine_fn fn)
221 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
222 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
224 fprintf (outf, " + ");
225 print_generic_expr (outf, coef, TDF_SLIM);
226 fprintf (outf, " * x_%u", i);
230 /* Dumps the conflict function CF to the file OUTF. */
233 dump_conflict_function (FILE *outf, conflict_function *cf)
237 if (cf->n == NO_DEPENDENCE)
238 fprintf (outf, "no dependence\n");
239 else if (cf->n == NOT_KNOWN)
240 fprintf (outf, "not known\n");
243 for (i = 0; i < cf->n; i++)
246 dump_affine_function (outf, cf->fns[i]);
247 fprintf (outf, "]\n");
252 /* Dump function for a SUBSCRIPT structure. */
255 dump_subscript (FILE *outf, struct subscript *subscript)
257 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
259 fprintf (outf, "\n (subscript \n");
260 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
261 dump_conflict_function (outf, cf);
262 if (CF_NONTRIVIAL_P (cf))
264 tree last_iteration = SUB_LAST_CONFLICT (subscript);
265 fprintf (outf, " last_conflict: ");
266 print_generic_stmt (outf, last_iteration, 0);
269 cf = SUB_CONFLICTS_IN_B (subscript);
270 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
271 dump_conflict_function (outf, cf);
272 if (CF_NONTRIVIAL_P (cf))
274 tree last_iteration = SUB_LAST_CONFLICT (subscript);
275 fprintf (outf, " last_conflict: ");
276 print_generic_stmt (outf, last_iteration, 0);
279 fprintf (outf, " (Subscript distance: ");
280 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
281 fprintf (outf, " )\n");
282 fprintf (outf, " )\n");
285 /* Print the classic direction vector DIRV to OUTF. */
288 print_direction_vector (FILE *outf,
294 for (eq = 0; eq < length; eq++)
296 enum data_dependence_direction dir = ((enum data_dependence_direction)
302 fprintf (outf, " +");
305 fprintf (outf, " -");
308 fprintf (outf, " =");
310 case dir_positive_or_equal:
311 fprintf (outf, " +=");
313 case dir_positive_or_negative:
314 fprintf (outf, " +-");
316 case dir_negative_or_equal:
317 fprintf (outf, " -=");
320 fprintf (outf, " *");
323 fprintf (outf, "indep");
327 fprintf (outf, "\n");
330 /* Print a vector of direction vectors. */
333 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
339 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
340 print_direction_vector (outf, v, length);
343 /* Print out a vector VEC of length N to OUTFILE. */
346 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
350 for (i = 0; i < n; i++)
351 fprintf (outfile, "%3d ", vector[i]);
352 fprintf (outfile, "\n");
355 /* Print a vector of distance vectors. */
358 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
364 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
365 print_lambda_vector (outf, v, length);
371 debug_data_dependence_relation (struct data_dependence_relation *ddr)
373 dump_data_dependence_relation (stderr, ddr);
376 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
379 dump_data_dependence_relation (FILE *outf,
380 struct data_dependence_relation *ddr)
382 struct data_reference *dra, *drb;
384 fprintf (outf, "(Data Dep: \n");
386 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
393 dump_data_reference (outf, dra);
395 fprintf (outf, " (nil)\n");
397 dump_data_reference (outf, drb);
399 fprintf (outf, " (nil)\n");
401 fprintf (outf, " (don't know)\n)\n");
407 dump_data_reference (outf, dra);
408 dump_data_reference (outf, drb);
410 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
411 fprintf (outf, " (no dependence)\n");
413 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
418 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
420 fprintf (outf, " access_fn_A: ");
421 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
422 fprintf (outf, " access_fn_B: ");
423 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
424 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
427 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
428 fprintf (outf, " loop nest: (");
429 FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
430 fprintf (outf, "%d ", loopi->num);
431 fprintf (outf, ")\n");
433 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
435 fprintf (outf, " distance_vector: ");
436 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
440 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
442 fprintf (outf, " direction_vector: ");
443 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
448 fprintf (outf, ")\n");
451 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
454 dump_data_dependence_direction (FILE *file,
455 enum data_dependence_direction dir)
471 case dir_positive_or_negative:
472 fprintf (file, "+-");
475 case dir_positive_or_equal:
476 fprintf (file, "+=");
479 case dir_negative_or_equal:
480 fprintf (file, "-=");
492 /* Dumps the distance and direction vectors in FILE. DDRS contains
493 the dependence relations, and VECT_SIZE is the size of the
494 dependence vectors, or in other words the number of loops in the
498 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
501 struct data_dependence_relation *ddr;
504 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
505 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
507 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
509 fprintf (file, "DISTANCE_V (");
510 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
511 fprintf (file, ")\n");
514 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
516 fprintf (file, "DIRECTION_V (");
517 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
518 fprintf (file, ")\n");
522 fprintf (file, "\n\n");
525 /* Dumps the data dependence relations DDRS in FILE. */
528 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
531 struct data_dependence_relation *ddr;
533 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
534 dump_data_dependence_relation (file, ddr);
536 fprintf (file, "\n\n");
539 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
540 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
541 constant of type ssizetype, and returns true. If we cannot do this
542 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
546 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
547 tree *var, tree *off)
551 enum tree_code ocode = code;
559 *var = build_int_cst (type, 0);
560 *off = fold_convert (ssizetype, op0);
563 case POINTER_PLUS_EXPR:
568 split_constant_offset (op0, &var0, &off0);
569 split_constant_offset (op1, &var1, &off1);
570 *var = fold_build2 (code, type, var0, var1);
571 *off = size_binop (ocode, off0, off1);
575 if (TREE_CODE (op1) != INTEGER_CST)
578 split_constant_offset (op0, &var0, &off0);
579 *var = fold_build2 (MULT_EXPR, type, var0, op1);
580 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
586 HOST_WIDE_INT pbitsize, pbitpos;
587 enum machine_mode pmode;
588 int punsignedp, pvolatilep;
590 op0 = TREE_OPERAND (op0, 0);
591 if (!handled_component_p (op0))
594 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
595 &pmode, &punsignedp, &pvolatilep, false);
597 if (pbitpos % BITS_PER_UNIT != 0)
599 base = build_fold_addr_expr (base);
600 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
604 split_constant_offset (poffset, &poffset, &off1);
605 off0 = size_binop (PLUS_EXPR, off0, off1);
606 if (POINTER_TYPE_P (TREE_TYPE (base)))
607 base = fold_build_pointer_plus (base, poffset);
609 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
610 fold_convert (TREE_TYPE (base), poffset));
613 var0 = fold_convert (type, base);
615 /* If variable length types are involved, punt, otherwise casts
616 might be converted into ARRAY_REFs in gimplify_conversion.
617 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
618 possibly no longer appears in current GIMPLE, might resurface.
619 This perhaps could run
620 if (CONVERT_EXPR_P (var0))
622 gimplify_conversion (&var0);
623 // Attempt to fill in any within var0 found ARRAY_REF's
624 // element size from corresponding op embedded ARRAY_REF,
625 // if unsuccessful, just punt.
627 while (POINTER_TYPE_P (type))
628 type = TREE_TYPE (type);
629 if (int_size_in_bytes (type) < 0)
639 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
640 enum tree_code subcode;
642 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
645 var0 = gimple_assign_rhs1 (def_stmt);
646 subcode = gimple_assign_rhs_code (def_stmt);
647 var1 = gimple_assign_rhs2 (def_stmt);
649 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
653 /* We must not introduce undefined overflow, and we must not change the value.
654 Hence we're okay if the inner type doesn't overflow to start with
655 (pointer or signed), the outer type also is an integer or pointer
656 and the outer precision is at least as large as the inner. */
657 tree itype = TREE_TYPE (op0);
658 if ((POINTER_TYPE_P (itype)
659 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
660 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
661 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
663 split_constant_offset (op0, &var0, off);
664 *var = fold_convert (type, var0);
675 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
676 will be ssizetype. */
679 split_constant_offset (tree exp, tree *var, tree *off)
681 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
685 *off = ssize_int (0);
688 if (tree_is_chrec (exp))
691 otype = TREE_TYPE (exp);
692 code = TREE_CODE (exp);
693 extract_ops_from_tree (exp, &code, &op0, &op1);
694 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
696 *var = fold_convert (type, e);
701 /* Returns the address ADDR of an object in a canonical shape (without nop
702 casts, and with type of pointer to the object). */
705 canonicalize_base_object_address (tree addr)
711 /* The base address may be obtained by casting from integer, in that case
713 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
716 if (TREE_CODE (addr) != ADDR_EXPR)
719 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
722 /* Analyzes the behavior of the memory reference DR in the innermost loop or
723 basic block that contains it. Returns true if analysis succeed or false
727 dr_analyze_innermost (struct data_reference *dr)
729 gimple stmt = DR_STMT (dr);
730 struct loop *loop = loop_containing_stmt (stmt);
731 tree ref = DR_REF (dr);
732 HOST_WIDE_INT pbitsize, pbitpos;
734 enum machine_mode pmode;
735 int punsignedp, pvolatilep;
736 affine_iv base_iv, offset_iv;
737 tree init, dinit, step;
738 bool in_loop = (loop && loop->num);
740 if (dump_file && (dump_flags & TDF_DETAILS))
741 fprintf (dump_file, "analyze_innermost: ");
743 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
744 &pmode, &punsignedp, &pvolatilep, false);
745 gcc_assert (base != NULL_TREE);
747 if (pbitpos % BITS_PER_UNIT != 0)
749 if (dump_file && (dump_flags & TDF_DETAILS))
750 fprintf (dump_file, "failed: bit offset alignment.\n");
754 if (TREE_CODE (base) == MEM_REF)
756 if (!integer_zerop (TREE_OPERAND (base, 1)))
760 double_int moff = mem_ref_offset (base);
761 poffset = double_int_to_tree (sizetype, moff);
764 poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
766 base = TREE_OPERAND (base, 0);
769 base = build_fold_addr_expr (base);
772 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
775 if (dump_file && (dump_flags & TDF_DETAILS))
776 fprintf (dump_file, "failed: evolution of base is not affine.\n");
783 base_iv.step = ssize_int (0);
784 base_iv.no_overflow = true;
789 offset_iv.base = ssize_int (0);
790 offset_iv.step = ssize_int (0);
796 offset_iv.base = poffset;
797 offset_iv.step = ssize_int (0);
799 else if (!simple_iv (loop, loop_containing_stmt (stmt),
800 poffset, &offset_iv, false))
802 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "failed: evolution of offset is not"
809 init = ssize_int (pbitpos / BITS_PER_UNIT);
810 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
811 init = size_binop (PLUS_EXPR, init, dinit);
812 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
813 init = size_binop (PLUS_EXPR, init, dinit);
815 step = size_binop (PLUS_EXPR,
816 fold_convert (ssizetype, base_iv.step),
817 fold_convert (ssizetype, offset_iv.step));
819 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
821 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
825 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
827 if (dump_file && (dump_flags & TDF_DETAILS))
828 fprintf (dump_file, "success.\n");
833 /* Determines the base object and the list of indices of memory reference
834 DR, analyzed in LOOP and instantiated in loop nest NEST. */
837 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
839 VEC (tree, heap) *access_fns = NULL;
840 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
841 tree base, off, access_fn = NULL_TREE;
842 basic_block before_loop = NULL;
845 before_loop = block_before_loop (nest);
847 while (handled_component_p (aref))
849 if (TREE_CODE (aref) == ARRAY_REF)
851 op = TREE_OPERAND (aref, 1);
854 access_fn = analyze_scalar_evolution (loop, op);
855 access_fn = instantiate_scev (before_loop, loop, access_fn);
856 VEC_safe_push (tree, heap, access_fns, access_fn);
859 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
862 aref = TREE_OPERAND (aref, 0);
866 && (INDIRECT_REF_P (aref)
867 || TREE_CODE (aref) == MEM_REF))
869 op = TREE_OPERAND (aref, 0);
870 access_fn = analyze_scalar_evolution (loop, op);
871 access_fn = instantiate_scev (before_loop, loop, access_fn);
872 base = initial_condition (access_fn);
873 split_constant_offset (base, &base, &off);
874 if (TREE_CODE (aref) == MEM_REF)
875 off = size_binop (PLUS_EXPR, off,
876 fold_convert (ssizetype, TREE_OPERAND (aref, 1)));
877 access_fn = chrec_replace_initial_condition (access_fn,
878 fold_convert (TREE_TYPE (base), off));
880 TREE_OPERAND (aref, 0) = base;
881 VEC_safe_push (tree, heap, access_fns, access_fn);
884 if (TREE_CODE (aref) == MEM_REF)
885 TREE_OPERAND (aref, 1)
886 = build_int_cst (TREE_TYPE (TREE_OPERAND (aref, 1)), 0);
888 if (TREE_CODE (ref) == MEM_REF
889 && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
890 && integer_zerop (TREE_OPERAND (ref, 1)))
891 ref = TREE_OPERAND (TREE_OPERAND (ref, 0), 0);
893 /* For canonicalization purposes we'd like to strip all outermost
894 zero-offset component-refs.
895 ??? For now simply handle zero-index array-refs. */
896 while (TREE_CODE (ref) == ARRAY_REF
897 && integer_zerop (TREE_OPERAND (ref, 1)))
898 ref = TREE_OPERAND (ref, 0);
900 DR_BASE_OBJECT (dr) = ref;
901 DR_ACCESS_FNS (dr) = access_fns;
904 /* Extracts the alias analysis information from the memory reference DR. */
907 dr_analyze_alias (struct data_reference *dr)
909 tree ref = DR_REF (dr);
910 tree base = get_base_address (ref), addr;
912 if (INDIRECT_REF_P (base)
913 || TREE_CODE (base) == MEM_REF)
915 addr = TREE_OPERAND (base, 0);
916 if (TREE_CODE (addr) == SSA_NAME)
917 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
921 /* Frees data reference DR. */
924 free_data_ref (data_reference_p dr)
926 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
930 /* Analyzes memory reference MEMREF accessed in STMT. The reference
931 is read if IS_READ is true, write otherwise. Returns the
932 data_reference description of MEMREF. NEST is the outermost loop
933 in which the reference should be instantiated, LOOP is the loop in
934 which the data reference should be analyzed. */
936 struct data_reference *
937 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
940 struct data_reference *dr;
942 if (dump_file && (dump_flags & TDF_DETAILS))
944 fprintf (dump_file, "Creating dr for ");
945 print_generic_expr (dump_file, memref, TDF_SLIM);
946 fprintf (dump_file, "\n");
949 dr = XCNEW (struct data_reference);
951 DR_REF (dr) = memref;
952 DR_IS_READ (dr) = is_read;
954 dr_analyze_innermost (dr);
955 dr_analyze_indices (dr, nest, loop);
956 dr_analyze_alias (dr);
958 if (dump_file && (dump_flags & TDF_DETAILS))
960 fprintf (dump_file, "\tbase_address: ");
961 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
962 fprintf (dump_file, "\n\toffset from base address: ");
963 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
964 fprintf (dump_file, "\n\tconstant offset from base address: ");
965 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
966 fprintf (dump_file, "\n\tstep: ");
967 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
968 fprintf (dump_file, "\n\taligned to: ");
969 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
970 fprintf (dump_file, "\n\tbase_object: ");
971 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
972 fprintf (dump_file, "\n");
978 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
981 dr_equal_offsets_p1 (tree offset1, tree offset2)
985 STRIP_NOPS (offset1);
986 STRIP_NOPS (offset2);
988 if (offset1 == offset2)
991 if (TREE_CODE (offset1) != TREE_CODE (offset2)
992 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
995 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
996 TREE_OPERAND (offset2, 0));
998 if (!res || !BINARY_CLASS_P (offset1))
1001 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1002 TREE_OPERAND (offset2, 1));
1007 /* Check if DRA and DRB have equal offsets. */
1009 dr_equal_offsets_p (struct data_reference *dra,
1010 struct data_reference *drb)
1012 tree offset1, offset2;
1014 offset1 = DR_OFFSET (dra);
1015 offset2 = DR_OFFSET (drb);
1017 return dr_equal_offsets_p1 (offset1, offset2);
1020 /* Returns true if FNA == FNB. */
1023 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1025 unsigned i, n = VEC_length (tree, fna);
1027 if (n != VEC_length (tree, fnb))
1030 for (i = 0; i < n; i++)
1031 if (!operand_equal_p (VEC_index (tree, fna, i),
1032 VEC_index (tree, fnb, i), 0))
1038 /* If all the functions in CF are the same, returns one of them,
1039 otherwise returns NULL. */
1042 common_affine_function (conflict_function *cf)
1047 if (!CF_NONTRIVIAL_P (cf))
1052 for (i = 1; i < cf->n; i++)
1053 if (!affine_function_equal_p (comm, cf->fns[i]))
1059 /* Returns the base of the affine function FN. */
1062 affine_function_base (affine_fn fn)
1064 return VEC_index (tree, fn, 0);
1067 /* Returns true if FN is a constant. */
1070 affine_function_constant_p (affine_fn fn)
1075 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1076 if (!integer_zerop (coef))
1082 /* Returns true if FN is the zero constant function. */
1085 affine_function_zero_p (affine_fn fn)
1087 return (integer_zerop (affine_function_base (fn))
1088 && affine_function_constant_p (fn));
1091 /* Returns a signed integer type with the largest precision from TA
1095 signed_type_for_types (tree ta, tree tb)
1097 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1098 return signed_type_for (ta);
1100 return signed_type_for (tb);
1103 /* Applies operation OP on affine functions FNA and FNB, and returns the
1107 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1113 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1115 n = VEC_length (tree, fna);
1116 m = VEC_length (tree, fnb);
1120 n = VEC_length (tree, fnb);
1121 m = VEC_length (tree, fna);
1124 ret = VEC_alloc (tree, heap, m);
1125 for (i = 0; i < n; i++)
1127 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1128 TREE_TYPE (VEC_index (tree, fnb, i)));
1130 VEC_quick_push (tree, ret,
1131 fold_build2 (op, type,
1132 VEC_index (tree, fna, i),
1133 VEC_index (tree, fnb, i)));
1136 for (; VEC_iterate (tree, fna, i, coef); i++)
1137 VEC_quick_push (tree, ret,
1138 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1139 coef, integer_zero_node));
1140 for (; VEC_iterate (tree, fnb, i, coef); i++)
1141 VEC_quick_push (tree, ret,
1142 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1143 integer_zero_node, coef));
1148 /* Returns the sum of affine functions FNA and FNB. */
1151 affine_fn_plus (affine_fn fna, affine_fn fnb)
1153 return affine_fn_op (PLUS_EXPR, fna, fnb);
1156 /* Returns the difference of affine functions FNA and FNB. */
1159 affine_fn_minus (affine_fn fna, affine_fn fnb)
1161 return affine_fn_op (MINUS_EXPR, fna, fnb);
1164 /* Frees affine function FN. */
1167 affine_fn_free (affine_fn fn)
1169 VEC_free (tree, heap, fn);
1172 /* Determine for each subscript in the data dependence relation DDR
1176 compute_subscript_distance (struct data_dependence_relation *ddr)
1178 conflict_function *cf_a, *cf_b;
1179 affine_fn fn_a, fn_b, diff;
1181 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1185 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1187 struct subscript *subscript;
1189 subscript = DDR_SUBSCRIPT (ddr, i);
1190 cf_a = SUB_CONFLICTS_IN_A (subscript);
1191 cf_b = SUB_CONFLICTS_IN_B (subscript);
1193 fn_a = common_affine_function (cf_a);
1194 fn_b = common_affine_function (cf_b);
1197 SUB_DISTANCE (subscript) = chrec_dont_know;
1200 diff = affine_fn_minus (fn_a, fn_b);
1202 if (affine_function_constant_p (diff))
1203 SUB_DISTANCE (subscript) = affine_function_base (diff);
1205 SUB_DISTANCE (subscript) = chrec_dont_know;
1207 affine_fn_free (diff);
1212 /* Returns the conflict function for "unknown". */
1214 static conflict_function *
1215 conflict_fn_not_known (void)
1217 conflict_function *fn = XCNEW (conflict_function);
1223 /* Returns the conflict function for "independent". */
1225 static conflict_function *
1226 conflict_fn_no_dependence (void)
1228 conflict_function *fn = XCNEW (conflict_function);
1229 fn->n = NO_DEPENDENCE;
1234 /* Returns true if the address of OBJ is invariant in LOOP. */
1237 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1239 while (handled_component_p (obj))
1241 if (TREE_CODE (obj) == ARRAY_REF)
1243 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1244 need to check the stride and the lower bound of the reference. */
1245 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1247 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1251 else if (TREE_CODE (obj) == COMPONENT_REF)
1253 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1257 obj = TREE_OPERAND (obj, 0);
1260 if (!INDIRECT_REF_P (obj)
1261 && TREE_CODE (obj) != MEM_REF)
1264 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1268 /* Returns false if we can prove that data references A and B do not alias,
1272 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1274 tree addr_a = DR_BASE_OBJECT (a);
1275 tree addr_b = DR_BASE_OBJECT (b);
1277 if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1278 return refs_output_dependent_p (addr_a, addr_b);
1279 else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1280 return refs_anti_dependent_p (addr_a, addr_b);
1281 return refs_may_alias_p (addr_a, addr_b);
1284 static void compute_self_dependence (struct data_dependence_relation *);
1286 /* Initialize a data dependence relation between data accesses A and
1287 B. NB_LOOPS is the number of loops surrounding the references: the
1288 size of the classic distance/direction vectors. */
1290 static struct data_dependence_relation *
1291 initialize_data_dependence_relation (struct data_reference *a,
1292 struct data_reference *b,
1293 VEC (loop_p, heap) *loop_nest)
1295 struct data_dependence_relation *res;
1298 res = XNEW (struct data_dependence_relation);
1301 DDR_LOOP_NEST (res) = NULL;
1302 DDR_REVERSED_P (res) = false;
1303 DDR_SUBSCRIPTS (res) = NULL;
1304 DDR_DIR_VECTS (res) = NULL;
1305 DDR_DIST_VECTS (res) = NULL;
1307 if (a == NULL || b == NULL)
1309 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1313 /* If the data references do not alias, then they are independent. */
1314 if (!dr_may_alias_p (a, b))
1316 DDR_ARE_DEPENDENT (res) = chrec_known;
1320 /* When the references are exactly the same, don't spend time doing
1321 the data dependence tests, just initialize the ddr and return. */
1322 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1324 DDR_AFFINE_P (res) = true;
1325 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1326 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1327 DDR_LOOP_NEST (res) = loop_nest;
1328 DDR_INNER_LOOP (res) = 0;
1329 DDR_SELF_REFERENCE (res) = true;
1330 compute_self_dependence (res);
1334 /* If the references do not access the same object, we do not know
1335 whether they alias or not. */
1336 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1338 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1342 /* If the base of the object is not invariant in the loop nest, we cannot
1343 analyze it. TODO -- in fact, it would suffice to record that there may
1344 be arbitrary dependences in the loops where the base object varies. */
1346 && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1347 DR_BASE_OBJECT (a)))
1349 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1353 /* If the number of dimensions of the access to not agree we can have
1354 a pointer access to a component of the array element type and an
1355 array access while the base-objects are still the same. Punt. */
1356 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1358 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1362 DDR_AFFINE_P (res) = true;
1363 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1364 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1365 DDR_LOOP_NEST (res) = loop_nest;
1366 DDR_INNER_LOOP (res) = 0;
1367 DDR_SELF_REFERENCE (res) = false;
1369 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1371 struct subscript *subscript;
1373 subscript = XNEW (struct subscript);
1374 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1375 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1376 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1377 SUB_DISTANCE (subscript) = chrec_dont_know;
1378 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1384 /* Frees memory used by the conflict function F. */
1387 free_conflict_function (conflict_function *f)
1391 if (CF_NONTRIVIAL_P (f))
1393 for (i = 0; i < f->n; i++)
1394 affine_fn_free (f->fns[i]);
1399 /* Frees memory used by SUBSCRIPTS. */
1402 free_subscripts (VEC (subscript_p, heap) *subscripts)
1407 FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1409 free_conflict_function (s->conflicting_iterations_in_a);
1410 free_conflict_function (s->conflicting_iterations_in_b);
1413 VEC_free (subscript_p, heap, subscripts);
1416 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1420 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1423 if (dump_file && (dump_flags & TDF_DETAILS))
1425 fprintf (dump_file, "(dependence classified: ");
1426 print_generic_expr (dump_file, chrec, 0);
1427 fprintf (dump_file, ")\n");
1430 DDR_ARE_DEPENDENT (ddr) = chrec;
1431 free_subscripts (DDR_SUBSCRIPTS (ddr));
1432 DDR_SUBSCRIPTS (ddr) = NULL;
1435 /* The dependence relation DDR cannot be represented by a distance
1439 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1441 if (dump_file && (dump_flags & TDF_DETAILS))
1442 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1444 DDR_AFFINE_P (ddr) = false;
1449 /* This section contains the classic Banerjee tests. */
1451 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1452 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1455 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1457 return (evolution_function_is_constant_p (chrec_a)
1458 && evolution_function_is_constant_p (chrec_b));
1461 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1462 variable, i.e., if the SIV (Single Index Variable) test is true. */
1465 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1467 if ((evolution_function_is_constant_p (chrec_a)
1468 && evolution_function_is_univariate_p (chrec_b))
1469 || (evolution_function_is_constant_p (chrec_b)
1470 && evolution_function_is_univariate_p (chrec_a)))
1473 if (evolution_function_is_univariate_p (chrec_a)
1474 && evolution_function_is_univariate_p (chrec_b))
1476 switch (TREE_CODE (chrec_a))
1478 case POLYNOMIAL_CHREC:
1479 switch (TREE_CODE (chrec_b))
1481 case POLYNOMIAL_CHREC:
1482 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1497 /* Creates a conflict function with N dimensions. The affine functions
1498 in each dimension follow. */
1500 static conflict_function *
1501 conflict_fn (unsigned n, ...)
1504 conflict_function *ret = XCNEW (conflict_function);
1507 gcc_assert (0 < n && n <= MAX_DIM);
1511 for (i = 0; i < n; i++)
1512 ret->fns[i] = va_arg (ap, affine_fn);
1518 /* Returns constant affine function with value CST. */
1521 affine_fn_cst (tree cst)
1523 affine_fn fn = VEC_alloc (tree, heap, 1);
1524 VEC_quick_push (tree, fn, cst);
1528 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1531 affine_fn_univar (tree cst, unsigned dim, tree coef)
1533 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1536 gcc_assert (dim > 0);
1537 VEC_quick_push (tree, fn, cst);
1538 for (i = 1; i < dim; i++)
1539 VEC_quick_push (tree, fn, integer_zero_node);
1540 VEC_quick_push (tree, fn, coef);
1544 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1545 *OVERLAPS_B are initialized to the functions that describe the
1546 relation between the elements accessed twice by CHREC_A and
1547 CHREC_B. For k >= 0, the following property is verified:
1549 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1552 analyze_ziv_subscript (tree chrec_a,
1554 conflict_function **overlaps_a,
1555 conflict_function **overlaps_b,
1556 tree *last_conflicts)
1558 tree type, difference;
1559 dependence_stats.num_ziv++;
1561 if (dump_file && (dump_flags & TDF_DETAILS))
1562 fprintf (dump_file, "(analyze_ziv_subscript \n");
1564 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1565 chrec_a = chrec_convert (type, chrec_a, NULL);
1566 chrec_b = chrec_convert (type, chrec_b, NULL);
1567 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1569 switch (TREE_CODE (difference))
1572 if (integer_zerop (difference))
1574 /* The difference is equal to zero: the accessed index
1575 overlaps for each iteration in the loop. */
1576 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1577 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1578 *last_conflicts = chrec_dont_know;
1579 dependence_stats.num_ziv_dependent++;
1583 /* The accesses do not overlap. */
1584 *overlaps_a = conflict_fn_no_dependence ();
1585 *overlaps_b = conflict_fn_no_dependence ();
1586 *last_conflicts = integer_zero_node;
1587 dependence_stats.num_ziv_independent++;
1592 /* We're not sure whether the indexes overlap. For the moment,
1593 conservatively answer "don't know". */
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1595 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1597 *overlaps_a = conflict_fn_not_known ();
1598 *overlaps_b = conflict_fn_not_known ();
1599 *last_conflicts = chrec_dont_know;
1600 dependence_stats.num_ziv_unimplemented++;
1604 if (dump_file && (dump_flags & TDF_DETAILS))
1605 fprintf (dump_file, ")\n");
1608 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1609 and only if it fits to the int type. If this is not the case, or the
1610 bound on the number of iterations of LOOP could not be derived, returns
1614 max_stmt_executions_tree (struct loop *loop)
1619 if (!max_stmt_executions (loop, true, &nit))
1620 return chrec_dont_know;
1622 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1623 if (!double_int_fits_to_tree_p (type, nit))
1624 return chrec_dont_know;
1626 return double_int_to_tree (type, nit);
1629 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1630 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1631 *OVERLAPS_B are initialized to the functions that describe the
1632 relation between the elements accessed twice by CHREC_A and
1633 CHREC_B. For k >= 0, the following property is verified:
1635 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1638 analyze_siv_subscript_cst_affine (tree chrec_a,
1640 conflict_function **overlaps_a,
1641 conflict_function **overlaps_b,
1642 tree *last_conflicts)
1644 bool value0, value1, value2;
1645 tree type, difference, tmp;
1647 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1648 chrec_a = chrec_convert (type, chrec_a, NULL);
1649 chrec_b = chrec_convert (type, chrec_b, NULL);
1650 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1652 if (!chrec_is_positive (initial_condition (difference), &value0))
1654 if (dump_file && (dump_flags & TDF_DETAILS))
1655 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1657 dependence_stats.num_siv_unimplemented++;
1658 *overlaps_a = conflict_fn_not_known ();
1659 *overlaps_b = conflict_fn_not_known ();
1660 *last_conflicts = chrec_dont_know;
1665 if (value0 == false)
1667 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1669 if (dump_file && (dump_flags & TDF_DETAILS))
1670 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1672 *overlaps_a = conflict_fn_not_known ();
1673 *overlaps_b = conflict_fn_not_known ();
1674 *last_conflicts = chrec_dont_know;
1675 dependence_stats.num_siv_unimplemented++;
1684 chrec_b = {10, +, 1}
1687 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1689 HOST_WIDE_INT numiter;
1690 struct loop *loop = get_chrec_loop (chrec_b);
1692 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1693 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1694 fold_build1 (ABS_EXPR, type, difference),
1695 CHREC_RIGHT (chrec_b));
1696 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1697 *last_conflicts = integer_one_node;
1700 /* Perform weak-zero siv test to see if overlap is
1701 outside the loop bounds. */
1702 numiter = max_stmt_executions_int (loop, true);
1705 && compare_tree_int (tmp, numiter) > 0)
1707 free_conflict_function (*overlaps_a);
1708 free_conflict_function (*overlaps_b);
1709 *overlaps_a = conflict_fn_no_dependence ();
1710 *overlaps_b = conflict_fn_no_dependence ();
1711 *last_conflicts = integer_zero_node;
1712 dependence_stats.num_siv_independent++;
1715 dependence_stats.num_siv_dependent++;
1719 /* When the step does not divide the difference, there are
1723 *overlaps_a = conflict_fn_no_dependence ();
1724 *overlaps_b = conflict_fn_no_dependence ();
1725 *last_conflicts = integer_zero_node;
1726 dependence_stats.num_siv_independent++;
1735 chrec_b = {10, +, -1}
1737 In this case, chrec_a will not overlap with chrec_b. */
1738 *overlaps_a = conflict_fn_no_dependence ();
1739 *overlaps_b = conflict_fn_no_dependence ();
1740 *last_conflicts = integer_zero_node;
1741 dependence_stats.num_siv_independent++;
1748 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1750 if (dump_file && (dump_flags & TDF_DETAILS))
1751 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1753 *overlaps_a = conflict_fn_not_known ();
1754 *overlaps_b = conflict_fn_not_known ();
1755 *last_conflicts = chrec_dont_know;
1756 dependence_stats.num_siv_unimplemented++;
1761 if (value2 == false)
1765 chrec_b = {10, +, -1}
1767 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1769 HOST_WIDE_INT numiter;
1770 struct loop *loop = get_chrec_loop (chrec_b);
1772 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1773 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1774 CHREC_RIGHT (chrec_b));
1775 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1776 *last_conflicts = integer_one_node;
1778 /* Perform weak-zero siv test to see if overlap is
1779 outside the loop bounds. */
1780 numiter = max_stmt_executions_int (loop, true);
1783 && compare_tree_int (tmp, numiter) > 0)
1785 free_conflict_function (*overlaps_a);
1786 free_conflict_function (*overlaps_b);
1787 *overlaps_a = conflict_fn_no_dependence ();
1788 *overlaps_b = conflict_fn_no_dependence ();
1789 *last_conflicts = integer_zero_node;
1790 dependence_stats.num_siv_independent++;
1793 dependence_stats.num_siv_dependent++;
1797 /* When the step does not divide the difference, there
1801 *overlaps_a = conflict_fn_no_dependence ();
1802 *overlaps_b = conflict_fn_no_dependence ();
1803 *last_conflicts = integer_zero_node;
1804 dependence_stats.num_siv_independent++;
1814 In this case, chrec_a will not overlap with chrec_b. */
1815 *overlaps_a = conflict_fn_no_dependence ();
1816 *overlaps_b = conflict_fn_no_dependence ();
1817 *last_conflicts = integer_zero_node;
1818 dependence_stats.num_siv_independent++;
1826 /* Helper recursive function for initializing the matrix A. Returns
1827 the initial value of CHREC. */
1830 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1834 switch (TREE_CODE (chrec))
1836 case POLYNOMIAL_CHREC:
1837 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1839 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1840 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1846 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1847 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1849 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1854 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1855 return chrec_convert (chrec_type (chrec), op, NULL);
1860 /* Handle ~X as -1 - X. */
1861 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1862 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1863 build_int_cst (TREE_TYPE (chrec), -1), op);
1875 #define FLOOR_DIV(x,y) ((x) / (y))
1877 /* Solves the special case of the Diophantine equation:
1878 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1880 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1881 number of iterations that loops X and Y run. The overlaps will be
1882 constructed as evolutions in dimension DIM. */
1885 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1886 affine_fn *overlaps_a,
1887 affine_fn *overlaps_b,
1888 tree *last_conflicts, int dim)
1890 if (((step_a > 0 && step_b > 0)
1891 || (step_a < 0 && step_b < 0)))
1893 int step_overlaps_a, step_overlaps_b;
1894 int gcd_steps_a_b, last_conflict, tau2;
1896 gcd_steps_a_b = gcd (step_a, step_b);
1897 step_overlaps_a = step_b / gcd_steps_a_b;
1898 step_overlaps_b = step_a / gcd_steps_a_b;
1902 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1903 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1904 last_conflict = tau2;
1905 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1908 *last_conflicts = chrec_dont_know;
1910 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1911 build_int_cst (NULL_TREE,
1913 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1914 build_int_cst (NULL_TREE,
1920 *overlaps_a = affine_fn_cst (integer_zero_node);
1921 *overlaps_b = affine_fn_cst (integer_zero_node);
1922 *last_conflicts = integer_zero_node;
1926 /* Solves the special case of a Diophantine equation where CHREC_A is
1927 an affine bivariate function, and CHREC_B is an affine univariate
1928 function. For example,
1930 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1932 has the following overlapping functions:
1934 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1935 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1936 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1938 FORNOW: This is a specialized implementation for a case occurring in
1939 a common benchmark. Implement the general algorithm. */
1942 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1943 conflict_function **overlaps_a,
1944 conflict_function **overlaps_b,
1945 tree *last_conflicts)
1947 bool xz_p, yz_p, xyz_p;
1948 int step_x, step_y, step_z;
1949 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1950 affine_fn overlaps_a_xz, overlaps_b_xz;
1951 affine_fn overlaps_a_yz, overlaps_b_yz;
1952 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1953 affine_fn ova1, ova2, ovb;
1954 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1956 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
1957 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
1958 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
1961 max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
1962 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
1963 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
1965 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
1967 if (dump_file && (dump_flags & TDF_DETAILS))
1968 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
1970 *overlaps_a = conflict_fn_not_known ();
1971 *overlaps_b = conflict_fn_not_known ();
1972 *last_conflicts = chrec_dont_know;
1976 niter = MIN (niter_x, niter_z);
1977 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
1980 &last_conflicts_xz, 1);
1981 niter = MIN (niter_y, niter_z);
1982 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
1985 &last_conflicts_yz, 2);
1986 niter = MIN (niter_x, niter_z);
1987 niter = MIN (niter_y, niter);
1988 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
1991 &last_conflicts_xyz, 3);
1993 xz_p = !integer_zerop (last_conflicts_xz);
1994 yz_p = !integer_zerop (last_conflicts_yz);
1995 xyz_p = !integer_zerop (last_conflicts_xyz);
1997 if (xz_p || yz_p || xyz_p)
1999 ova1 = affine_fn_cst (integer_zero_node);
2000 ova2 = affine_fn_cst (integer_zero_node);
2001 ovb = affine_fn_cst (integer_zero_node);
2004 affine_fn t0 = ova1;
2007 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2008 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2009 affine_fn_free (t0);
2010 affine_fn_free (t2);
2011 *last_conflicts = last_conflicts_xz;
2015 affine_fn t0 = ova2;
2018 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2019 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2020 affine_fn_free (t0);
2021 affine_fn_free (t2);
2022 *last_conflicts = last_conflicts_yz;
2026 affine_fn t0 = ova1;
2027 affine_fn t2 = ova2;
2030 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2031 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2032 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2033 affine_fn_free (t0);
2034 affine_fn_free (t2);
2035 affine_fn_free (t4);
2036 *last_conflicts = last_conflicts_xyz;
2038 *overlaps_a = conflict_fn (2, ova1, ova2);
2039 *overlaps_b = conflict_fn (1, ovb);
2043 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2044 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2045 *last_conflicts = integer_zero_node;
2048 affine_fn_free (overlaps_a_xz);
2049 affine_fn_free (overlaps_b_xz);
2050 affine_fn_free (overlaps_a_yz);
2051 affine_fn_free (overlaps_b_yz);
2052 affine_fn_free (overlaps_a_xyz);
2053 affine_fn_free (overlaps_b_xyz);
2056 /* Copy the elements of vector VEC1 with length SIZE to VEC2. */
2059 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2062 memcpy (vec2, vec1, size * sizeof (*vec1));
2065 /* Copy the elements of M x N matrix MAT1 to MAT2. */
2068 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2073 for (i = 0; i < m; i++)
2074 lambda_vector_copy (mat1[i], mat2[i], n);
2077 /* Store the N x N identity matrix in MAT. */
2080 lambda_matrix_id (lambda_matrix mat, int size)
2084 for (i = 0; i < size; i++)
2085 for (j = 0; j < size; j++)
2086 mat[i][j] = (i == j) ? 1 : 0;
2089 /* Return the first nonzero element of vector VEC1 between START and N.
2090 We must have START <= N. Returns N if VEC1 is the zero vector. */
2093 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2096 while (j < n && vec1[j] == 0)
2101 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2102 R2 = R2 + CONST1 * R1. */
2105 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2112 for (i = 0; i < n; i++)
2113 mat[r2][i] += const1 * mat[r1][i];
2116 /* Swap rows R1 and R2 in matrix MAT. */
2119 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2128 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2129 and store the result in VEC2. */
2132 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2133 int size, int const1)
2138 lambda_vector_clear (vec2, size);
2140 for (i = 0; i < size; i++)
2141 vec2[i] = const1 * vec1[i];
2144 /* Negate vector VEC1 with length SIZE and store it in VEC2. */
2147 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2150 lambda_vector_mult_const (vec1, vec2, size, -1);
2153 /* Negate row R1 of matrix MAT which has N columns. */
2156 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2158 lambda_vector_negate (mat[r1], mat[r1], n);
2161 /* Return true if two vectors are equal. */
2164 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2167 for (i = 0; i < size; i++)
2168 if (vec1[i] != vec2[i])
2173 /* Given an M x N integer matrix A, this function determines an M x
2174 M unimodular matrix U, and an M x N echelon matrix S such that
2175 "U.A = S". This decomposition is also known as "right Hermite".
2177 Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2178 Restructuring Compilers" Utpal Banerjee. */
2181 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2182 lambda_matrix S, lambda_matrix U)
2186 lambda_matrix_copy (A, S, m, n);
2187 lambda_matrix_id (U, m);
2189 for (j = 0; j < n; j++)
2191 if (lambda_vector_first_nz (S[j], m, i0) < m)
2194 for (i = m - 1; i >= i0; i--)
2196 while (S[i][j] != 0)
2198 int sigma, factor, a, b;
2202 sigma = (a * b < 0) ? -1: 1;
2205 factor = sigma * (a / b);
2207 lambda_matrix_row_add (S, n, i, i-1, -factor);
2208 lambda_matrix_row_exchange (S, i, i-1);
2210 lambda_matrix_row_add (U, m, i, i-1, -factor);
2211 lambda_matrix_row_exchange (U, i, i-1);
2218 /* Determines the overlapping elements due to accesses CHREC_A and
2219 CHREC_B, that are affine functions. This function cannot handle
2220 symbolic evolution functions, ie. when initial conditions are
2221 parameters, because it uses lambda matrices of integers. */
2224 analyze_subscript_affine_affine (tree chrec_a,
2226 conflict_function **overlaps_a,
2227 conflict_function **overlaps_b,
2228 tree *last_conflicts)
2230 unsigned nb_vars_a, nb_vars_b, dim;
2231 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2232 lambda_matrix A, U, S;
2233 struct obstack scratch_obstack;
2235 if (eq_evolutions_p (chrec_a, chrec_b))
2237 /* The accessed index overlaps for each iteration in the
2239 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2240 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2241 *last_conflicts = chrec_dont_know;
2244 if (dump_file && (dump_flags & TDF_DETAILS))
2245 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2247 /* For determining the initial intersection, we have to solve a
2248 Diophantine equation. This is the most time consuming part.
2250 For answering to the question: "Is there a dependence?" we have
2251 to prove that there exists a solution to the Diophantine
2252 equation, and that the solution is in the iteration domain,
2253 i.e. the solution is positive or zero, and that the solution
2254 happens before the upper bound loop.nb_iterations. Otherwise
2255 there is no dependence. This function outputs a description of
2256 the iterations that hold the intersections. */
2258 nb_vars_a = nb_vars_in_chrec (chrec_a);
2259 nb_vars_b = nb_vars_in_chrec (chrec_b);
2261 gcc_obstack_init (&scratch_obstack);
2263 dim = nb_vars_a + nb_vars_b;
2264 U = lambda_matrix_new (dim, dim, &scratch_obstack);
2265 A = lambda_matrix_new (dim, 1, &scratch_obstack);
2266 S = lambda_matrix_new (dim, 1, &scratch_obstack);
2268 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2269 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2270 gamma = init_b - init_a;
2272 /* Don't do all the hard work of solving the Diophantine equation
2273 when we already know the solution: for example,
2276 | gamma = 3 - 3 = 0.
2277 Then the first overlap occurs during the first iterations:
2278 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2282 if (nb_vars_a == 1 && nb_vars_b == 1)
2284 HOST_WIDE_INT step_a, step_b;
2285 HOST_WIDE_INT niter, niter_a, niter_b;
2288 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2289 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2290 niter = MIN (niter_a, niter_b);
2291 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2292 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2294 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2297 *overlaps_a = conflict_fn (1, ova);
2298 *overlaps_b = conflict_fn (1, ovb);
2301 else if (nb_vars_a == 2 && nb_vars_b == 1)
2302 compute_overlap_steps_for_affine_1_2
2303 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2305 else if (nb_vars_a == 1 && nb_vars_b == 2)
2306 compute_overlap_steps_for_affine_1_2
2307 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2311 if (dump_file && (dump_flags & TDF_DETAILS))
2312 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2313 *overlaps_a = conflict_fn_not_known ();
2314 *overlaps_b = conflict_fn_not_known ();
2315 *last_conflicts = chrec_dont_know;
2317 goto end_analyze_subs_aa;
2321 lambda_matrix_right_hermite (A, dim, 1, S, U);
2326 lambda_matrix_row_negate (U, dim, 0);
2328 gcd_alpha_beta = S[0][0];
2330 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2331 but that is a quite strange case. Instead of ICEing, answer
2333 if (gcd_alpha_beta == 0)
2335 *overlaps_a = conflict_fn_not_known ();
2336 *overlaps_b = conflict_fn_not_known ();
2337 *last_conflicts = chrec_dont_know;
2338 goto end_analyze_subs_aa;
2341 /* The classic "gcd-test". */
2342 if (!int_divides_p (gcd_alpha_beta, gamma))
2344 /* The "gcd-test" has determined that there is no integer
2345 solution, i.e. there is no dependence. */
2346 *overlaps_a = conflict_fn_no_dependence ();
2347 *overlaps_b = conflict_fn_no_dependence ();
2348 *last_conflicts = integer_zero_node;
2351 /* Both access functions are univariate. This includes SIV and MIV cases. */
2352 else if (nb_vars_a == 1 && nb_vars_b == 1)
2354 /* Both functions should have the same evolution sign. */
2355 if (((A[0][0] > 0 && -A[1][0] > 0)
2356 || (A[0][0] < 0 && -A[1][0] < 0)))
2358 /* The solutions are given by:
2360 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2363 For a given integer t. Using the following variables,
2365 | i0 = u11 * gamma / gcd_alpha_beta
2366 | j0 = u12 * gamma / gcd_alpha_beta
2373 | y0 = j0 + j1 * t. */
2374 HOST_WIDE_INT i0, j0, i1, j1;
2376 i0 = U[0][0] * gamma / gcd_alpha_beta;
2377 j0 = U[0][1] * gamma / gcd_alpha_beta;
2381 if ((i1 == 0 && i0 < 0)
2382 || (j1 == 0 && j0 < 0))
2384 /* There is no solution.
2385 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2386 falls in here, but for the moment we don't look at the
2387 upper bound of the iteration domain. */
2388 *overlaps_a = conflict_fn_no_dependence ();
2389 *overlaps_b = conflict_fn_no_dependence ();
2390 *last_conflicts = integer_zero_node;
2391 goto end_analyze_subs_aa;
2394 if (i1 > 0 && j1 > 0)
2396 HOST_WIDE_INT niter_a = max_stmt_executions_int
2397 (get_chrec_loop (chrec_a), true);
2398 HOST_WIDE_INT niter_b = max_stmt_executions_int
2399 (get_chrec_loop (chrec_b), true);
2400 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2402 /* (X0, Y0) is a solution of the Diophantine equation:
2403 "chrec_a (X0) = chrec_b (Y0)". */
2404 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2406 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2407 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2409 /* (X1, Y1) is the smallest positive solution of the eq
2410 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2411 first conflict occurs. */
2412 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2413 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2414 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2418 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2419 FLOOR_DIV (niter - j0, j1));
2420 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2422 /* If the overlap occurs outside of the bounds of the
2423 loop, there is no dependence. */
2424 if (x1 >= niter || y1 >= niter)
2426 *overlaps_a = conflict_fn_no_dependence ();
2427 *overlaps_b = conflict_fn_no_dependence ();
2428 *last_conflicts = integer_zero_node;
2429 goto end_analyze_subs_aa;
2432 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2435 *last_conflicts = chrec_dont_know;
2439 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2441 build_int_cst (NULL_TREE, i1)));
2444 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2446 build_int_cst (NULL_TREE, j1)));
2450 /* FIXME: For the moment, the upper bound of the
2451 iteration domain for i and j is not checked. */
2452 if (dump_file && (dump_flags & TDF_DETAILS))
2453 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2454 *overlaps_a = conflict_fn_not_known ();
2455 *overlaps_b = conflict_fn_not_known ();
2456 *last_conflicts = chrec_dont_know;
2461 if (dump_file && (dump_flags & TDF_DETAILS))
2462 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2463 *overlaps_a = conflict_fn_not_known ();
2464 *overlaps_b = conflict_fn_not_known ();
2465 *last_conflicts = chrec_dont_know;
2470 if (dump_file && (dump_flags & TDF_DETAILS))
2471 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2472 *overlaps_a = conflict_fn_not_known ();
2473 *overlaps_b = conflict_fn_not_known ();
2474 *last_conflicts = chrec_dont_know;
2477 end_analyze_subs_aa:
2478 obstack_free (&scratch_obstack, NULL);
2479 if (dump_file && (dump_flags & TDF_DETAILS))
2481 fprintf (dump_file, " (overlaps_a = ");
2482 dump_conflict_function (dump_file, *overlaps_a);
2483 fprintf (dump_file, ")\n (overlaps_b = ");
2484 dump_conflict_function (dump_file, *overlaps_b);
2485 fprintf (dump_file, ")\n");
2486 fprintf (dump_file, ")\n");
2490 /* Returns true when analyze_subscript_affine_affine can be used for
2491 determining the dependence relation between chrec_a and chrec_b,
2492 that contain symbols. This function modifies chrec_a and chrec_b
2493 such that the analysis result is the same, and such that they don't
2494 contain symbols, and then can safely be passed to the analyzer.
2496 Example: The analysis of the following tuples of evolutions produce
2497 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2500 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2501 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2505 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2507 tree diff, type, left_a, left_b, right_b;
2509 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2510 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2511 /* FIXME: For the moment not handled. Might be refined later. */
2514 type = chrec_type (*chrec_a);
2515 left_a = CHREC_LEFT (*chrec_a);
2516 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2517 diff = chrec_fold_minus (type, left_a, left_b);
2519 if (!evolution_function_is_constant_p (diff))
2522 if (dump_file && (dump_flags & TDF_DETAILS))
2523 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2525 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2526 diff, CHREC_RIGHT (*chrec_a));
2527 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2528 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2529 build_int_cst (type, 0),
2534 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2535 *OVERLAPS_B are initialized to the functions that describe the
2536 relation between the elements accessed twice by CHREC_A and
2537 CHREC_B. For k >= 0, the following property is verified:
2539 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2542 analyze_siv_subscript (tree chrec_a,
2544 conflict_function **overlaps_a,
2545 conflict_function **overlaps_b,
2546 tree *last_conflicts,
2549 dependence_stats.num_siv++;
2551 if (dump_file && (dump_flags & TDF_DETAILS))
2552 fprintf (dump_file, "(analyze_siv_subscript \n");
2554 if (evolution_function_is_constant_p (chrec_a)
2555 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2556 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2557 overlaps_a, overlaps_b, last_conflicts);
2559 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2560 && evolution_function_is_constant_p (chrec_b))
2561 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2562 overlaps_b, overlaps_a, last_conflicts);
2564 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2565 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2567 if (!chrec_contains_symbols (chrec_a)
2568 && !chrec_contains_symbols (chrec_b))
2570 analyze_subscript_affine_affine (chrec_a, chrec_b,
2571 overlaps_a, overlaps_b,
2574 if (CF_NOT_KNOWN_P (*overlaps_a)
2575 || CF_NOT_KNOWN_P (*overlaps_b))
2576 dependence_stats.num_siv_unimplemented++;
2577 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2578 || CF_NO_DEPENDENCE_P (*overlaps_b))
2579 dependence_stats.num_siv_independent++;
2581 dependence_stats.num_siv_dependent++;
2583 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2586 analyze_subscript_affine_affine (chrec_a, chrec_b,
2587 overlaps_a, overlaps_b,
2590 if (CF_NOT_KNOWN_P (*overlaps_a)
2591 || CF_NOT_KNOWN_P (*overlaps_b))
2592 dependence_stats.num_siv_unimplemented++;
2593 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2594 || CF_NO_DEPENDENCE_P (*overlaps_b))
2595 dependence_stats.num_siv_independent++;
2597 dependence_stats.num_siv_dependent++;
2600 goto siv_subscript_dontknow;
2605 siv_subscript_dontknow:;
2606 if (dump_file && (dump_flags & TDF_DETAILS))
2607 fprintf (dump_file, "siv test failed: unimplemented.\n");
2608 *overlaps_a = conflict_fn_not_known ();
2609 *overlaps_b = conflict_fn_not_known ();
2610 *last_conflicts = chrec_dont_know;
2611 dependence_stats.num_siv_unimplemented++;
2614 if (dump_file && (dump_flags & TDF_DETAILS))
2615 fprintf (dump_file, ")\n");
2618 /* Returns false if we can prove that the greatest common divisor of the steps
2619 of CHREC does not divide CST, false otherwise. */
2622 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2624 HOST_WIDE_INT cd = 0, val;
2627 if (!host_integerp (cst, 0))
2629 val = tree_low_cst (cst, 0);
2631 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2633 step = CHREC_RIGHT (chrec);
2634 if (!host_integerp (step, 0))
2636 cd = gcd (cd, tree_low_cst (step, 0));
2637 chrec = CHREC_LEFT (chrec);
2640 return val % cd == 0;
2643 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2644 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2645 functions that describe the relation between the elements accessed
2646 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2649 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2652 analyze_miv_subscript (tree chrec_a,
2654 conflict_function **overlaps_a,
2655 conflict_function **overlaps_b,
2656 tree *last_conflicts,
2657 struct loop *loop_nest)
2659 tree type, difference;
2661 dependence_stats.num_miv++;
2662 if (dump_file && (dump_flags & TDF_DETAILS))
2663 fprintf (dump_file, "(analyze_miv_subscript \n");
2665 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2666 chrec_a = chrec_convert (type, chrec_a, NULL);
2667 chrec_b = chrec_convert (type, chrec_b, NULL);
2668 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2670 if (eq_evolutions_p (chrec_a, chrec_b))
2672 /* Access functions are the same: all the elements are accessed
2673 in the same order. */
2674 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2675 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2676 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2677 dependence_stats.num_miv_dependent++;
2680 else if (evolution_function_is_constant_p (difference)
2681 /* For the moment, the following is verified:
2682 evolution_function_is_affine_multivariate_p (chrec_a,
2684 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2686 /* testsuite/.../ssa-chrec-33.c
2687 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2689 The difference is 1, and all the evolution steps are multiples
2690 of 2, consequently there are no overlapping elements. */
2691 *overlaps_a = conflict_fn_no_dependence ();
2692 *overlaps_b = conflict_fn_no_dependence ();
2693 *last_conflicts = integer_zero_node;
2694 dependence_stats.num_miv_independent++;
2697 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2698 && !chrec_contains_symbols (chrec_a)
2699 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2700 && !chrec_contains_symbols (chrec_b))
2702 /* testsuite/.../ssa-chrec-35.c
2703 {0, +, 1}_2 vs. {0, +, 1}_3
2704 the overlapping elements are respectively located at iterations:
2705 {0, +, 1}_x and {0, +, 1}_x,
2706 in other words, we have the equality:
2707 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2710 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2711 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2713 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2714 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2716 analyze_subscript_affine_affine (chrec_a, chrec_b,
2717 overlaps_a, overlaps_b, last_conflicts);
2719 if (CF_NOT_KNOWN_P (*overlaps_a)
2720 || CF_NOT_KNOWN_P (*overlaps_b))
2721 dependence_stats.num_miv_unimplemented++;
2722 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2723 || CF_NO_DEPENDENCE_P (*overlaps_b))
2724 dependence_stats.num_miv_independent++;
2726 dependence_stats.num_miv_dependent++;
2731 /* When the analysis is too difficult, answer "don't know". */
2732 if (dump_file && (dump_flags & TDF_DETAILS))
2733 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2735 *overlaps_a = conflict_fn_not_known ();
2736 *overlaps_b = conflict_fn_not_known ();
2737 *last_conflicts = chrec_dont_know;
2738 dependence_stats.num_miv_unimplemented++;
2741 if (dump_file && (dump_flags & TDF_DETAILS))
2742 fprintf (dump_file, ")\n");
2745 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2746 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2747 OVERLAP_ITERATIONS_B are initialized with two functions that
2748 describe the iterations that contain conflicting elements.
2750 Remark: For an integer k >= 0, the following equality is true:
2752 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2756 analyze_overlapping_iterations (tree chrec_a,
2758 conflict_function **overlap_iterations_a,
2759 conflict_function **overlap_iterations_b,
2760 tree *last_conflicts, struct loop *loop_nest)
2762 unsigned int lnn = loop_nest->num;
2764 dependence_stats.num_subscript_tests++;
2766 if (dump_file && (dump_flags & TDF_DETAILS))
2768 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2769 fprintf (dump_file, " (chrec_a = ");
2770 print_generic_expr (dump_file, chrec_a, 0);
2771 fprintf (dump_file, ")\n (chrec_b = ");
2772 print_generic_expr (dump_file, chrec_b, 0);
2773 fprintf (dump_file, ")\n");
2776 if (chrec_a == NULL_TREE
2777 || chrec_b == NULL_TREE
2778 || chrec_contains_undetermined (chrec_a)
2779 || chrec_contains_undetermined (chrec_b))
2781 dependence_stats.num_subscript_undetermined++;
2783 *overlap_iterations_a = conflict_fn_not_known ();
2784 *overlap_iterations_b = conflict_fn_not_known ();
2787 /* If they are the same chrec, and are affine, they overlap
2788 on every iteration. */
2789 else if (eq_evolutions_p (chrec_a, chrec_b)
2790 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2791 || operand_equal_p (chrec_a, chrec_b, 0)))
2793 dependence_stats.num_same_subscript_function++;
2794 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2795 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2796 *last_conflicts = chrec_dont_know;
2799 /* If they aren't the same, and aren't affine, we can't do anything
2801 else if ((chrec_contains_symbols (chrec_a)
2802 || chrec_contains_symbols (chrec_b))
2803 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2804 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2806 dependence_stats.num_subscript_undetermined++;
2807 *overlap_iterations_a = conflict_fn_not_known ();
2808 *overlap_iterations_b = conflict_fn_not_known ();
2811 else if (ziv_subscript_p (chrec_a, chrec_b))
2812 analyze_ziv_subscript (chrec_a, chrec_b,
2813 overlap_iterations_a, overlap_iterations_b,
2816 else if (siv_subscript_p (chrec_a, chrec_b))
2817 analyze_siv_subscript (chrec_a, chrec_b,
2818 overlap_iterations_a, overlap_iterations_b,
2819 last_conflicts, lnn);
2822 analyze_miv_subscript (chrec_a, chrec_b,
2823 overlap_iterations_a, overlap_iterations_b,
2824 last_conflicts, loop_nest);
2826 if (dump_file && (dump_flags & TDF_DETAILS))
2828 fprintf (dump_file, " (overlap_iterations_a = ");
2829 dump_conflict_function (dump_file, *overlap_iterations_a);
2830 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2831 dump_conflict_function (dump_file, *overlap_iterations_b);
2832 fprintf (dump_file, ")\n");
2833 fprintf (dump_file, ")\n");
2837 /* Helper function for uniquely inserting distance vectors. */
2840 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2845 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2846 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2849 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2852 /* Helper function for uniquely inserting direction vectors. */
2855 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2860 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2861 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2864 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2867 /* Add a distance of 1 on all the loops outer than INDEX. If we
2868 haven't yet determined a distance for this outer loop, push a new
2869 distance vector composed of the previous distance, and a distance
2870 of 1 for this outer loop. Example:
2878 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2879 save (0, 1), then we have to save (1, 0). */
2882 add_outer_distances (struct data_dependence_relation *ddr,
2883 lambda_vector dist_v, int index)
2885 /* For each outer loop where init_v is not set, the accesses are
2886 in dependence of distance 1 in the loop. */
2887 while (--index >= 0)
2889 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2890 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2892 save_dist_v (ddr, save_v);
2896 /* Return false when fail to represent the data dependence as a
2897 distance vector. INIT_B is set to true when a component has been
2898 added to the distance vector DIST_V. INDEX_CARRY is then set to
2899 the index in DIST_V that carries the dependence. */
2902 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2903 struct data_reference *ddr_a,
2904 struct data_reference *ddr_b,
2905 lambda_vector dist_v, bool *init_b,
2909 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2911 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2913 tree access_fn_a, access_fn_b;
2914 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2916 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2918 non_affine_dependence_relation (ddr);
2922 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2923 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2925 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2926 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2929 int var_a = CHREC_VARIABLE (access_fn_a);
2930 int var_b = CHREC_VARIABLE (access_fn_b);
2933 || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2935 non_affine_dependence_relation (ddr);
2939 dist = int_cst_value (SUB_DISTANCE (subscript));
2940 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
2941 *index_carry = MIN (index, *index_carry);
2943 /* This is the subscript coupling test. If we have already
2944 recorded a distance for this loop (a distance coming from
2945 another subscript), it should be the same. For example,
2946 in the following code, there is no dependence:
2953 if (init_v[index] != 0 && dist_v[index] != dist)
2955 finalize_ddr_dependent (ddr, chrec_known);
2959 dist_v[index] = dist;
2963 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2965 /* This can be for example an affine vs. constant dependence
2966 (T[i] vs. T[3]) that is not an affine dependence and is
2967 not representable as a distance vector. */
2968 non_affine_dependence_relation (ddr);
2976 /* Return true when the DDR contains only constant access functions. */
2979 constant_access_functions (const struct data_dependence_relation *ddr)
2983 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2984 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2985 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2991 /* Helper function for the case where DDR_A and DDR_B are the same
2992 multivariate access function with a constant step. For an example
2996 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2999 tree c_1 = CHREC_LEFT (c_2);
3000 tree c_0 = CHREC_LEFT (c_1);
3001 lambda_vector dist_v;
3004 /* Polynomials with more than 2 variables are not handled yet. When
3005 the evolution steps are parameters, it is not possible to
3006 represent the dependence using classical distance vectors. */
3007 if (TREE_CODE (c_0) != INTEGER_CST
3008 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3009 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3011 DDR_AFFINE_P (ddr) = false;
3015 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3016 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3018 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
3019 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3020 v1 = int_cst_value (CHREC_RIGHT (c_1));
3021 v2 = int_cst_value (CHREC_RIGHT (c_2));
3034 save_dist_v (ddr, dist_v);
3036 add_outer_distances (ddr, dist_v, x_1);
3039 /* Helper function for the case where DDR_A and DDR_B are the same
3040 access functions. */
3043 add_other_self_distances (struct data_dependence_relation *ddr)
3045 lambda_vector dist_v;
3047 int index_carry = DDR_NB_LOOPS (ddr);
3049 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3051 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3053 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3055 if (!evolution_function_is_univariate_p (access_fun))
3057 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3059 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3063 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3065 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3066 add_multivariate_self_dist (ddr, access_fun);
3068 /* The evolution step is not constant: it varies in
3069 the outer loop, so this cannot be represented by a
3070 distance vector. For example in pr34635.c the
3071 evolution is {0, +, {0, +, 4}_1}_2. */
3072 DDR_AFFINE_P (ddr) = false;
3077 index_carry = MIN (index_carry,
3078 index_in_loop_nest (CHREC_VARIABLE (access_fun),
3079 DDR_LOOP_NEST (ddr)));
3083 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3084 add_outer_distances (ddr, dist_v, index_carry);
3088 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3090 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3092 dist_v[DDR_INNER_LOOP (ddr)] = 1;
3093 save_dist_v (ddr, dist_v);
3096 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
3097 is the case for example when access functions are the same and
3098 equal to a constant, as in:
3105 in which case the distance vectors are (0) and (1). */
3108 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3112 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3114 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3115 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3116 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3118 for (j = 0; j < ca->n; j++)
3119 if (affine_function_zero_p (ca->fns[j]))
3121 insert_innermost_unit_dist_vector (ddr);
3125 for (j = 0; j < cb->n; j++)
3126 if (affine_function_zero_p (cb->fns[j]))
3128 insert_innermost_unit_dist_vector (ddr);
3134 /* Compute the classic per loop distance vector. DDR is the data
3135 dependence relation to build a vector from. Return false when fail
3136 to represent the data dependence as a distance vector. */
3139 build_classic_dist_vector (struct data_dependence_relation *ddr,
3140 struct loop *loop_nest)
3142 bool init_b = false;
3143 int index_carry = DDR_NB_LOOPS (ddr);
3144 lambda_vector dist_v;
3146 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3149 if (same_access_functions (ddr))
3151 /* Save the 0 vector. */
3152 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3153 save_dist_v (ddr, dist_v);
3155 if (constant_access_functions (ddr))
3156 add_distance_for_zero_overlaps (ddr);
3158 if (DDR_NB_LOOPS (ddr) > 1)
3159 add_other_self_distances (ddr);
3164 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3165 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3166 dist_v, &init_b, &index_carry))
3169 /* Save the distance vector if we initialized one. */
3172 /* Verify a basic constraint: classic distance vectors should
3173 always be lexicographically positive.
3175 Data references are collected in the order of execution of
3176 the program, thus for the following loop
3178 | for (i = 1; i < 100; i++)
3179 | for (j = 1; j < 100; j++)
3181 | t = T[j+1][i-1]; // A
3182 | T[j][i] = t + 2; // B
3185 references are collected following the direction of the wind:
3186 A then B. The data dependence tests are performed also
3187 following this order, such that we're looking at the distance
3188 separating the elements accessed by A from the elements later
3189 accessed by B. But in this example, the distance returned by
3190 test_dep (A, B) is lexicographically negative (-1, 1), that
3191 means that the access A occurs later than B with respect to
3192 the outer loop, ie. we're actually looking upwind. In this
3193 case we solve test_dep (B, A) looking downwind to the
3194 lexicographically positive solution, that returns the
3195 distance vector (1, -1). */
3196 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3198 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3199 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3202 compute_subscript_distance (ddr);
3203 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3204 save_v, &init_b, &index_carry))
3206 save_dist_v (ddr, save_v);
3207 DDR_REVERSED_P (ddr) = true;
3209 /* In this case there is a dependence forward for all the
3212 | for (k = 1; k < 100; k++)
3213 | for (i = 1; i < 100; i++)
3214 | for (j = 1; j < 100; j++)
3216 | t = T[j+1][i-1]; // A
3217 | T[j][i] = t + 2; // B
3225 if (DDR_NB_LOOPS (ddr) > 1)
3227 add_outer_distances (ddr, save_v, index_carry);
3228 add_outer_distances (ddr, dist_v, index_carry);
3233 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3234 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3236 if (DDR_NB_LOOPS (ddr) > 1)
3238 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3240 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3241 DDR_A (ddr), loop_nest))
3243 compute_subscript_distance (ddr);
3244 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3245 opposite_v, &init_b,
3249 save_dist_v (ddr, save_v);
3250 add_outer_distances (ddr, dist_v, index_carry);
3251 add_outer_distances (ddr, opposite_v, index_carry);
3254 save_dist_v (ddr, save_v);
3259 /* There is a distance of 1 on all the outer loops: Example:
3260 there is a dependence of distance 1 on loop_1 for the array A.
3266 add_outer_distances (ddr, dist_v,
3267 lambda_vector_first_nz (dist_v,
3268 DDR_NB_LOOPS (ddr), 0));
3271 if (dump_file && (dump_flags & TDF_DETAILS))
3275 fprintf (dump_file, "(build_classic_dist_vector\n");
3276 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3278 fprintf (dump_file, " dist_vector = (");
3279 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3280 DDR_NB_LOOPS (ddr));
3281 fprintf (dump_file, " )\n");
3283 fprintf (dump_file, ")\n");
3289 /* Return the direction for a given distance.
3290 FIXME: Computing dir this way is suboptimal, since dir can catch
3291 cases that dist is unable to represent. */
3293 static inline enum data_dependence_direction
3294 dir_from_dist (int dist)
3297 return dir_positive;
3299 return dir_negative;
3304 /* Compute the classic per loop direction vector. DDR is the data
3305 dependence relation to build a vector from. */
3308 build_classic_dir_vector (struct data_dependence_relation *ddr)
3311 lambda_vector dist_v;
3313 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3315 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3317 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3318 dir_v[j] = dir_from_dist (dist_v[j]);
3320 save_dir_v (ddr, dir_v);
3324 /* Helper function. Returns true when there is a dependence between
3325 data references DRA and DRB. */
3328 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3329 struct data_reference *dra,
3330 struct data_reference *drb,
3331 struct loop *loop_nest)
3334 tree last_conflicts;
3335 struct subscript *subscript;
3337 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3340 conflict_function *overlaps_a, *overlaps_b;
3342 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3343 DR_ACCESS_FN (drb, i),
3344 &overlaps_a, &overlaps_b,
3345 &last_conflicts, loop_nest);
3347 if (CF_NOT_KNOWN_P (overlaps_a)
3348 || CF_NOT_KNOWN_P (overlaps_b))
3350 finalize_ddr_dependent (ddr, chrec_dont_know);
3351 dependence_stats.num_dependence_undetermined++;
3352 free_conflict_function (overlaps_a);
3353 free_conflict_function (overlaps_b);
3357 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3358 || CF_NO_DEPENDENCE_P (overlaps_b))
3360 finalize_ddr_dependent (ddr, chrec_known);
3361 dependence_stats.num_dependence_independent++;
3362 free_conflict_function (overlaps_a);
3363 free_conflict_function (overlaps_b);
3369 if (SUB_CONFLICTS_IN_A (subscript))
3370 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3371 if (SUB_CONFLICTS_IN_B (subscript))
3372 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3374 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3375 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3376 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3383 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3386 subscript_dependence_tester (struct data_dependence_relation *ddr,
3387 struct loop *loop_nest)
3390 if (dump_file && (dump_flags & TDF_DETAILS))
3391 fprintf (dump_file, "(subscript_dependence_tester \n");
3393 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3394 dependence_stats.num_dependence_dependent++;
3396 compute_subscript_distance (ddr);
3397 if (build_classic_dist_vector (ddr, loop_nest))
3398 build_classic_dir_vector (ddr);
3400 if (dump_file && (dump_flags & TDF_DETAILS))
3401 fprintf (dump_file, ")\n");
3404 /* Returns true when all the access functions of A are affine or
3405 constant with respect to LOOP_NEST. */
3408 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3409 const struct loop *loop_nest)
3412 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3415 FOR_EACH_VEC_ELT (tree, fns, i, t)
3416 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3417 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3423 /* Initializes an equation for an OMEGA problem using the information
3424 contained in the ACCESS_FUN. Returns true when the operation
3427 PB is the omega constraint system.
3428 EQ is the number of the equation to be initialized.
3429 OFFSET is used for shifting the variables names in the constraints:
3430 a constrain is composed of 2 * the number of variables surrounding
3431 dependence accesses. OFFSET is set either to 0 for the first n variables,
3432 then it is set to n.
3433 ACCESS_FUN is expected to be an affine chrec. */
3436 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3437 unsigned int offset, tree access_fun,
3438 struct data_dependence_relation *ddr)
3440 switch (TREE_CODE (access_fun))
3442 case POLYNOMIAL_CHREC:
3444 tree left = CHREC_LEFT (access_fun);
3445 tree right = CHREC_RIGHT (access_fun);
3446 int var = CHREC_VARIABLE (access_fun);
3449 if (TREE_CODE (right) != INTEGER_CST)
3452 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3453 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3455 /* Compute the innermost loop index. */
3456 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3459 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3460 += int_cst_value (right);
3462 switch (TREE_CODE (left))
3464 case POLYNOMIAL_CHREC:
3465 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3468 pb->eqs[eq].coef[0] += int_cst_value (left);
3477 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3485 /* As explained in the comments preceding init_omega_for_ddr, we have
3486 to set up a system for each loop level, setting outer loops
3487 variation to zero, and current loop variation to positive or zero.
3488 Save each lexico positive distance vector. */
3491 omega_extract_distance_vectors (omega_pb pb,
3492 struct data_dependence_relation *ddr)
3496 struct loop *loopi, *loopj;
3497 enum omega_result res;
3499 /* Set a new problem for each loop in the nest. The basis is the
3500 problem that we have initialized until now. On top of this we
3501 add new constraints. */
3502 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3503 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3506 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3507 DDR_NB_LOOPS (ddr));
3509 omega_copy_problem (copy, pb);
3511 /* For all the outer loops "loop_j", add "dj = 0". */
3513 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3515 eq = omega_add_zero_eq (copy, omega_black);
3516 copy->eqs[eq].coef[j + 1] = 1;
3519 /* For "loop_i", add "0 <= di". */
3520 geq = omega_add_zero_geq (copy, omega_black);
3521 copy->geqs[geq].coef[i + 1] = 1;
3523 /* Reduce the constraint system, and test that the current
3524 problem is feasible. */
3525 res = omega_simplify_problem (copy);
3526 if (res == omega_false
3527 || res == omega_unknown
3528 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3531 for (eq = 0; eq < copy->num_subs; eq++)
3532 if (copy->subs[eq].key == (int) i + 1)
3534 dist = copy->subs[eq].coef[0];
3540 /* Reinitialize problem... */
3541 omega_copy_problem (copy, pb);
3543 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3545 eq = omega_add_zero_eq (copy, omega_black);
3546 copy->eqs[eq].coef[j + 1] = 1;
3549 /* ..., but this time "di = 1". */
3550 eq = omega_add_zero_eq (copy, omega_black);
3551 copy->eqs[eq].coef[i + 1] = 1;
3552 copy->eqs[eq].coef[0] = -1;
3554 res = omega_simplify_problem (copy);
3555 if (res == omega_false
3556 || res == omega_unknown
3557 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3560 for (eq = 0; eq < copy->num_subs; eq++)
3561 if (copy->subs[eq].key == (int) i + 1)
3563 dist = copy->subs[eq].coef[0];
3569 /* Save the lexicographically positive distance vector. */
3572 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3573 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3577 for (eq = 0; eq < copy->num_subs; eq++)
3578 if (copy->subs[eq].key > 0)
3580 dist = copy->subs[eq].coef[0];
3581 dist_v[copy->subs[eq].key - 1] = dist;
3584 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3585 dir_v[j] = dir_from_dist (dist_v[j]);
3587 save_dist_v (ddr, dist_v);
3588 save_dir_v (ddr, dir_v);
3592 omega_free_problem (copy);
3596 /* This is called for each subscript of a tuple of data references:
3597 insert an equality for representing the conflicts. */
3600 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3601 struct data_dependence_relation *ddr,
3602 omega_pb pb, bool *maybe_dependent)
3605 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3606 TREE_TYPE (access_fun_b));
3607 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3608 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3609 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3612 /* When the fun_a - fun_b is not constant, the dependence is not
3613 captured by the classic distance vector representation. */
3614 if (TREE_CODE (difference) != INTEGER_CST)
3618 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3620 /* There is no dependence. */
3621 *maybe_dependent = false;
3625 minus_one = build_int_cst (type, -1);
3626 fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3628 eq = omega_add_zero_eq (pb, omega_black);
3629 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3630 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3631 /* There is probably a dependence, but the system of
3632 constraints cannot be built: answer "don't know". */
3636 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3637 && !int_divides_p (lambda_vector_gcd
3638 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3639 2 * DDR_NB_LOOPS (ddr)),
3640 pb->eqs[eq].coef[0]))
3642 /* There is no dependence. */
3643 *maybe_dependent = false;
3650 /* Helper function, same as init_omega_for_ddr but specialized for
3651 data references A and B. */
3654 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3655 struct data_dependence_relation *ddr,
3656 omega_pb pb, bool *maybe_dependent)
3661 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3663 /* Insert an equality per subscript. */
3664 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3666 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3667 ddr, pb, maybe_dependent))
3669 else if (*maybe_dependent == false)
3671 /* There is no dependence. */
3672 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3677 /* Insert inequalities: constraints corresponding to the iteration
3678 domain, i.e. the loops surrounding the references "loop_x" and
3679 the distance variables "dx". The layout of the OMEGA
3680 representation is as follows:
3681 - coef[0] is the constant
3682 - coef[1..nb_loops] are the protected variables that will not be
3683 removed by the solver: the "dx"
3684 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3686 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3687 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3689 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3692 ineq = omega_add_zero_geq (pb, omega_black);
3693 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3695 /* 0 <= loop_x + dx */
3696 ineq = omega_add_zero_geq (pb, omega_black);
3697 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3698 pb->geqs[ineq].coef[i + 1] = 1;
3702 /* loop_x <= nb_iters */
3703 ineq = omega_add_zero_geq (pb, omega_black);
3704 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3705 pb->geqs[ineq].coef[0] = nbi;
3707 /* loop_x + dx <= nb_iters */
3708 ineq = omega_add_zero_geq (pb, omega_black);
3709 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3710 pb->geqs[ineq].coef[i + 1] = -1;
3711 pb->geqs[ineq].coef[0] = nbi;
3713 /* A step "dx" bigger than nb_iters is not feasible, so
3714 add "0 <= nb_iters + dx", */
3715 ineq = omega_add_zero_geq (pb, omega_black);
3716 pb->geqs[ineq].coef[i + 1] = 1;
3717 pb->geqs[ineq].coef[0] = nbi;
3718 /* and "dx <= nb_iters". */
3719 ineq = omega_add_zero_geq (pb, omega_black);
3720 pb->geqs[ineq].coef[i + 1] = -1;
3721 pb->geqs[ineq].coef[0] = nbi;
3725 omega_extract_distance_vectors (pb, ddr);
3730 /* Sets up the Omega dependence problem for the data dependence
3731 relation DDR. Returns false when the constraint system cannot be
3732 built, ie. when the test answers "don't know". Returns true
3733 otherwise, and when independence has been proved (using one of the
3734 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3735 set MAYBE_DEPENDENT to true.
3737 Example: for setting up the dependence system corresponding to the
3738 conflicting accesses
3743 | ... A[2*j, 2*(i + j)]
3747 the following constraints come from the iteration domain:
3754 where di, dj are the distance variables. The constraints
3755 representing the conflicting elements are:
3758 i + 1 = 2 * (i + di + j + dj)
3760 For asking that the resulting distance vector (di, dj) be
3761 lexicographically positive, we insert the constraint "di >= 0". If
3762 "di = 0" in the solution, we fix that component to zero, and we
3763 look at the inner loops: we set a new problem where all the outer
3764 loop distances are zero, and fix this inner component to be
3765 positive. When one of the components is positive, we save that
3766 distance, and set a new problem where the distance on this loop is
3767 zero, searching for other distances in the inner loops. Here is
3768 the classic example that illustrates that we have to set for each
3769 inner loop a new problem:
3777 we have to save two distances (1, 0) and (0, 1).
3779 Given two array references, refA and refB, we have to set the
3780 dependence problem twice, refA vs. refB and refB vs. refA, and we
3781 cannot do a single test, as refB might occur before refA in the
3782 inner loops, and the contrary when considering outer loops: ex.
3787 | T[{1,+,1}_2][{1,+,1}_1] // refA
3788 | T[{2,+,1}_2][{0,+,1}_1] // refB
3793 refB touches the elements in T before refA, and thus for the same
3794 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3795 but for successive loop_0 iterations, we have (1, -1, 1)
3797 The Omega solver expects the distance variables ("di" in the
3798 previous example) to come first in the constraint system (as
3799 variables to be protected, or "safe" variables), the constraint
3800 system is built using the following layout:
3802 "cst | distance vars | index vars".
3806 init_omega_for_ddr (struct data_dependence_relation *ddr,
3807 bool *maybe_dependent)
3812 *maybe_dependent = true;
3814 if (same_access_functions (ddr))
3817 lambda_vector dir_v;
3819 /* Save the 0 vector. */
3820 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3821 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3822 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3823 dir_v[j] = dir_equal;
3824 save_dir_v (ddr, dir_v);
3826 /* Save the dependences carried by outer loops. */
3827 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3828 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3830 omega_free_problem (pb);
3834 /* Omega expects the protected variables (those that have to be kept
3835 after elimination) to appear first in the constraint system.
3836 These variables are the distance variables. In the following
3837 initialization we declare NB_LOOPS safe variables, and the total
3838 number of variables for the constraint system is 2*NB_LOOPS. */
3839 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3840 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3842 omega_free_problem (pb);
3844 /* Stop computation if not decidable, or no dependence. */
3845 if (res == false || *maybe_dependent == false)
3848 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3849 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3851 omega_free_problem (pb);
3856 /* Return true when DDR contains the same information as that stored
3857 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3860 ddr_consistent_p (FILE *file,
3861 struct data_dependence_relation *ddr,
3862 VEC (lambda_vector, heap) *dist_vects,
3863 VEC (lambda_vector, heap) *dir_vects)
3867 /* If dump_file is set, output there. */
3868 if (dump_file && (dump_flags & TDF_DETAILS))
3871 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3873 lambda_vector b_dist_v;
3874 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3875 VEC_length (lambda_vector, dist_vects),
3876 DDR_NUM_DIST_VECTS (ddr));
3878 fprintf (file, "Banerjee dist vectors:\n");
3879 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3880 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3882 fprintf (file, "Omega dist vectors:\n");
3883 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3884 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3886 fprintf (file, "data dependence relation:\n");
3887 dump_data_dependence_relation (file, ddr);
3889 fprintf (file, ")\n");
3893 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3895 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3896 VEC_length (lambda_vector, dir_vects),
3897 DDR_NUM_DIR_VECTS (ddr));
3901 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3903 lambda_vector a_dist_v;
3904 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3906 /* Distance vectors are not ordered in the same way in the DDR
3907 and in the DIST_VECTS: search for a matching vector. */
3908 FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3909 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3912 if (j == VEC_length (lambda_vector, dist_vects))
3914 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3915 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3916 fprintf (file, "not found in Omega dist vectors:\n");
3917 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3918 fprintf (file, "data dependence relation:\n");
3919 dump_data_dependence_relation (file, ddr);
3920 fprintf (file, ")\n");
3924 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3926 lambda_vector a_dir_v;
3927 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3929 /* Direction vectors are not ordered in the same way in the DDR
3930 and in the DIR_VECTS: search for a matching vector. */
3931 FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3932 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3935 if (j == VEC_length (lambda_vector, dist_vects))
3937 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3938 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3939 fprintf (file, "not found in Omega dir vectors:\n");
3940 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3941 fprintf (file, "data dependence relation:\n");
3942 dump_data_dependence_relation (file, ddr);
3943 fprintf (file, ")\n");
3950 /* This computes the affine dependence relation between A and B with
3951 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3952 independence between two accesses, while CHREC_DONT_KNOW is used
3953 for representing the unknown relation.
3955 Note that it is possible to stop the computation of the dependence
3956 relation the first time we detect a CHREC_KNOWN element for a given
3960 compute_affine_dependence (struct data_dependence_relation *ddr,
3961 struct loop *loop_nest)
3963 struct data_reference *dra = DDR_A (ddr);
3964 struct data_reference *drb = DDR_B (ddr);
3966 if (dump_file && (dump_flags & TDF_DETAILS))
3968 fprintf (dump_file, "(compute_affine_dependence\n");
3969 fprintf (dump_file, " (stmt_a = \n");
3970 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3971 fprintf (dump_file, ")\n (stmt_b = \n");
3972 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3973 fprintf (dump_file, ")\n");
3976 /* Analyze only when the dependence relation is not yet known. */
3977 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3978 && !DDR_SELF_REFERENCE (ddr))
3980 dependence_stats.num_dependence_tests++;
3982 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3983 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3985 if (flag_check_data_deps)
3987 /* Compute the dependences using the first algorithm. */
3988 subscript_dependence_tester (ddr, loop_nest);
3990 if (dump_file && (dump_flags & TDF_DETAILS))
3992 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3993 dump_data_dependence_relation (dump_file, ddr);
3996 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3998 bool maybe_dependent;
3999 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4001 /* Save the result of the first DD analyzer. */
4002 dist_vects = DDR_DIST_VECTS (ddr);
4003 dir_vects = DDR_DIR_VECTS (ddr);
4005 /* Reset the information. */
4006 DDR_DIST_VECTS (ddr) = NULL;
4007 DDR_DIR_VECTS (ddr) = NULL;
4009 /* Compute the same information using Omega. */
4010 if (!init_omega_for_ddr (ddr, &maybe_dependent))
4011 goto csys_dont_know;
4013 if (dump_file && (dump_flags & TDF_DETAILS))
4015 fprintf (dump_file, "Omega Analyzer\n");
4016 dump_data_dependence_relation (dump_file, ddr);
4019 /* Check that we get the same information. */
4020 if (maybe_dependent)
4021 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4026 subscript_dependence_tester (ddr, loop_nest);
4029 /* As a last case, if the dependence cannot be determined, or if
4030 the dependence is considered too difficult to determine, answer
4035 dependence_stats.num_dependence_undetermined++;
4037 if (dump_file && (dump_flags & TDF_DETAILS))
4039 fprintf (dump_file, "Data ref a:\n");
4040 dump_data_reference (dump_file, dra);
4041 fprintf (dump_file, "Data ref b:\n");
4042 dump_data_reference (dump_file, drb);
4043 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4045 finalize_ddr_dependent (ddr, chrec_dont_know);
4049 if (dump_file && (dump_flags & TDF_DETAILS))
4050 fprintf (dump_file, ")\n");
4053 /* This computes the dependence relation for the same data
4054 reference into DDR. */
4057 compute_self_dependence (struct data_dependence_relation *ddr)
4060 struct subscript *subscript;
4062 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4065 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4068 if (SUB_CONFLICTS_IN_A (subscript))
4069 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4070 if (SUB_CONFLICTS_IN_B (subscript))
4071 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4073 /* The accessed index overlaps for each iteration. */
4074 SUB_CONFLICTS_IN_A (subscript)
4075 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4076 SUB_CONFLICTS_IN_B (subscript)
4077 = conflict_fn (1, affine_fn_cst (integer_zero_node));
4078 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4081 /* The distance vector is the zero vector. */
4082 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4083 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4086 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4087 the data references in DATAREFS, in the LOOP_NEST. When
4088 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4092 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4093 VEC (ddr_p, heap) **dependence_relations,
4094 VEC (loop_p, heap) *loop_nest,
4095 bool compute_self_and_rr)
4097 struct data_dependence_relation *ddr;
4098 struct data_reference *a, *b;
4101 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4102 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4103 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4105 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4106 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4108 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4111 if (compute_self_and_rr)
4112 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4114 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4115 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4116 compute_self_dependence (ddr);
4120 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4121 true if STMT clobbers memory, false otherwise. */
4124 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4126 bool clobbers_memory = false;
4129 enum gimple_code stmt_code = gimple_code (stmt);
4133 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4134 Calls have side-effects, except those to const or pure
4136 if ((stmt_code == GIMPLE_CALL
4137 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4138 || (stmt_code == GIMPLE_ASM
4139 && gimple_asm_volatile_p (stmt)))
4140 clobbers_memory = true;
4142 if (!gimple_vuse (stmt))
4143 return clobbers_memory;
4145 if (stmt_code == GIMPLE_ASSIGN)
4148 op0 = gimple_assign_lhs_ptr (stmt);
4149 op1 = gimple_assign_rhs1_ptr (stmt);
4152 || (REFERENCE_CLASS_P (*op1)
4153 && (base = get_base_address (*op1))
4154 && TREE_CODE (base) != SSA_NAME))
4156 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4158 ref->is_read = true;
4161 else if (stmt_code == GIMPLE_CALL)
4165 op0 = gimple_call_lhs_ptr (stmt);
4166 n = gimple_call_num_args (stmt);
4167 for (i = 0; i < n; i++)
4169 op1 = gimple_call_arg_ptr (stmt, i);
4172 || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4174 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4176 ref->is_read = true;
4181 return clobbers_memory;
4185 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4187 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4189 ref->is_read = false;
4191 return clobbers_memory;
4194 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4195 reference, returns false, otherwise returns true. NEST is the outermost
4196 loop of the loop nest in which the references should be analyzed. */
4199 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4200 VEC (data_reference_p, heap) **datarefs)
4203 VEC (data_ref_loc, heap) *references;
4206 data_reference_p dr;
4208 if (get_references_in_stmt (stmt, &references))
4210 VEC_free (data_ref_loc, heap, references);
4214 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4216 dr = create_data_ref (nest, loop_containing_stmt (stmt),
4217 *ref->pos, stmt, ref->is_read);
4218 gcc_assert (dr != NULL);
4219 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4221 VEC_free (data_ref_loc, heap, references);
4225 /* Stores the data references in STMT to DATAREFS. If there is an
4226 unanalyzable reference, returns false, otherwise returns true.
4227 NEST is the outermost loop of the loop nest in which the references
4228 should be instantiated, LOOP is the loop in which the references
4229 should be analyzed. */
4232 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4233 VEC (data_reference_p, heap) **datarefs)
4236 VEC (data_ref_loc, heap) *references;
4239 data_reference_p dr;
4241 if (get_references_in_stmt (stmt, &references))
4243 VEC_free (data_ref_loc, heap, references);
4247 FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4249 dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4250 gcc_assert (dr != NULL);
4251 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4254 VEC_free (data_ref_loc, heap, references);
4258 /* Search the data references in LOOP, and record the information into
4259 DATAREFS. Returns chrec_dont_know when failing to analyze a
4260 difficult case, returns NULL_TREE otherwise. */
4263 find_data_references_in_bb (struct loop *loop, basic_block bb,
4264 VEC (data_reference_p, heap) **datarefs)
4266 gimple_stmt_iterator bsi;
4268 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4270 gimple stmt = gsi_stmt (bsi);
4272 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4274 struct data_reference *res;
4275 res = XCNEW (struct data_reference);
4276 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4278 return chrec_dont_know;
4285 /* Search the data references in LOOP, and record the information into
4286 DATAREFS. Returns chrec_dont_know when failing to analyze a
4287 difficult case, returns NULL_TREE otherwise.
4289 TODO: This function should be made smarter so that it can handle address
4290 arithmetic as if they were array accesses, etc. */
4293 find_data_references_in_loop (struct loop *loop,
4294 VEC (data_reference_p, heap) **datarefs)
4296 basic_block bb, *bbs;
4299 bbs = get_loop_body_in_dom_order (loop);
4301 for (i = 0; i < loop->num_nodes; i++)
4305 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4308 return chrec_dont_know;
4316 /* Recursive helper function. */
4319 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4321 /* Inner loops of the nest should not contain siblings. Example:
4322 when there are two consecutive loops,
4333 the dependence relation cannot be captured by the distance
4338 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4340 return find_loop_nest_1 (loop->inner, loop_nest);
4344 /* Return false when the LOOP is not well nested. Otherwise return
4345 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4346 contain the loops from the outermost to the innermost, as they will
4347 appear in the classic distance vector. */
4350 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4352 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4354 return find_loop_nest_1 (loop->inner, loop_nest);
4358 /* Returns true when the data dependences have been computed, false otherwise.
4359 Given a loop nest LOOP, the following vectors are returned:
4360 DATAREFS is initialized to all the array elements contained in this loop,
4361 DEPENDENCE_RELATIONS contains the relations between the data references.
4362 Compute read-read and self relations if
4363 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4366 compute_data_dependences_for_loop (struct loop *loop,
4367 bool compute_self_and_read_read_dependences,
4368 VEC (loop_p, heap) **loop_nest,
4369 VEC (data_reference_p, heap) **datarefs,
4370 VEC (ddr_p, heap) **dependence_relations)
4374 memset (&dependence_stats, 0, sizeof (dependence_stats));
4376 /* If the loop nest is not well formed, or one of the data references
4377 is not computable, give up without spending time to compute other
4380 || !find_loop_nest (loop, loop_nest)
4381 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4383 struct data_dependence_relation *ddr;
4385 /* Insert a single relation into dependence_relations:
4387 ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4388 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4392 compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4393 compute_self_and_read_read_dependences);
4395 if (dump_file && (dump_flags & TDF_STATS))
4397 fprintf (dump_file, "Dependence tester statistics:\n");
4399 fprintf (dump_file, "Number of dependence tests: %d\n",
4400 dependence_stats.num_dependence_tests);
4401 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4402 dependence_stats.num_dependence_dependent);
4403 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4404 dependence_stats.num_dependence_independent);
4405 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4406 dependence_stats.num_dependence_undetermined);
4408 fprintf (dump_file, "Number of subscript tests: %d\n",
4409 dependence_stats.num_subscript_tests);
4410 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4411 dependence_stats.num_subscript_undetermined);
4412 fprintf (dump_file, "Number of same subscript function: %d\n",
4413 dependence_stats.num_same_subscript_function);
4415 fprintf (dump_file, "Number of ziv tests: %d\n",
4416 dependence_stats.num_ziv);
4417 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4418 dependence_stats.num_ziv_dependent);
4419 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4420 dependence_stats.num_ziv_independent);
4421 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4422 dependence_stats.num_ziv_unimplemented);
4424 fprintf (dump_file, "Number of siv tests: %d\n",
4425 dependence_stats.num_siv);
4426 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4427 dependence_stats.num_siv_dependent);
4428 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4429 dependence_stats.num_siv_independent);
4430 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4431 dependence_stats.num_siv_unimplemented);
4433 fprintf (dump_file, "Number of miv tests: %d\n",
4434 dependence_stats.num_miv);
4435 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4436 dependence_stats.num_miv_dependent);
4437 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4438 dependence_stats.num_miv_independent);
4439 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4440 dependence_stats.num_miv_unimplemented);
4446 /* Returns true when the data dependences for the basic block BB have been
4447 computed, false otherwise.
4448 DATAREFS is initialized to all the array elements contained in this basic
4449 block, DEPENDENCE_RELATIONS contains the relations between the data
4450 references. Compute read-read and self relations if
4451 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4453 compute_data_dependences_for_bb (basic_block bb,
4454 bool compute_self_and_read_read_dependences,
4455 VEC (data_reference_p, heap) **datarefs,
4456 VEC (ddr_p, heap) **dependence_relations)
4458 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4461 compute_all_dependences (*datarefs, dependence_relations, NULL,
4462 compute_self_and_read_read_dependences);
4466 /* Entry point (for testing only). Analyze all the data references
4467 and the dependence relations in LOOP.
4469 The data references are computed first.
4471 A relation on these nodes is represented by a complete graph. Some
4472 of the relations could be of no interest, thus the relations can be
4475 In the following function we compute all the relations. This is
4476 just a first implementation that is here for:
4477 - for showing how to ask for the dependence relations,
4478 - for the debugging the whole dependence graph,
4479 - for the dejagnu testcases and maintenance.
4481 It is possible to ask only for a part of the graph, avoiding to
4482 compute the whole dependence graph. The computed dependences are
4483 stored in a knowledge base (KB) such that later queries don't
4484 recompute the same information. The implementation of this KB is
4485 transparent to the optimizer, and thus the KB can be changed with a
4486 more efficient implementation, or the KB could be disabled. */
4488 analyze_all_data_dependences (struct loop *loop)
4491 int nb_data_refs = 10;
4492 VEC (data_reference_p, heap) *datarefs =
4493 VEC_alloc (data_reference_p, heap, nb_data_refs);
4494 VEC (ddr_p, heap) *dependence_relations =
4495 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4496 VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4498 /* Compute DDs on the whole function. */
4499 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4500 &dependence_relations);
4504 dump_data_dependence_relations (dump_file, dependence_relations);
4505 fprintf (dump_file, "\n\n");
4507 if (dump_flags & TDF_DETAILS)
4508 dump_dist_dir_vectors (dump_file, dependence_relations);
4510 if (dump_flags & TDF_STATS)
4512 unsigned nb_top_relations = 0;
4513 unsigned nb_bot_relations = 0;
4514 unsigned nb_chrec_relations = 0;
4515 struct data_dependence_relation *ddr;
4517 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4519 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4522 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4526 nb_chrec_relations++;
4529 gather_stats_on_scev_database ();
4533 VEC_free (loop_p, heap, loop_nest);
4534 free_dependence_relations (dependence_relations);
4535 free_data_refs (datarefs);
4538 /* Computes all the data dependences and check that the results of
4539 several analyzers are the same. */
4542 tree_check_data_deps (void)
4545 struct loop *loop_nest;
4547 FOR_EACH_LOOP (li, loop_nest, 0)
4548 analyze_all_data_dependences (loop_nest);
4551 /* Free the memory used by a data dependence relation DDR. */
4554 free_dependence_relation (struct data_dependence_relation *ddr)
4559 if (DDR_SUBSCRIPTS (ddr))
4560 free_subscripts (DDR_SUBSCRIPTS (ddr));
4561 if (DDR_DIST_VECTS (ddr))
4562 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4563 if (DDR_DIR_VECTS (ddr))
4564 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4569 /* Free the memory used by the data dependence relations from
4570 DEPENDENCE_RELATIONS. */
4573 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4576 struct data_dependence_relation *ddr;
4578 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4580 free_dependence_relation (ddr);
4582 VEC_free (ddr_p, heap, dependence_relations);
4585 /* Free the memory used by the data references from DATAREFS. */
4588 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4591 struct data_reference *dr;
4593 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4595 VEC_free (data_reference_p, heap, datarefs);
4600 /* Dump vertex I in RDG to FILE. */
4603 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4605 struct vertex *v = &(rdg->vertices[i]);
4606 struct graph_edge *e;
4608 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4609 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4610 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4613 for (e = v->pred; e; e = e->pred_next)
4614 fprintf (file, " %d", e->src);
4616 fprintf (file, ") (out:");
4619 for (e = v->succ; e; e = e->succ_next)
4620 fprintf (file, " %d", e->dest);
4622 fprintf (file, ")\n");
4623 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4624 fprintf (file, ")\n");
4627 /* Call dump_rdg_vertex on stderr. */
4630 debug_rdg_vertex (struct graph *rdg, int i)
4632 dump_rdg_vertex (stderr, rdg, i);
4635 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4636 dumped vertices to that bitmap. */
4638 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4642 fprintf (file, "(%d\n", c);
4644 for (i = 0; i < rdg->n_vertices; i++)
4645 if (rdg->vertices[i].component == c)
4648 bitmap_set_bit (dumped, i);
4650 dump_rdg_vertex (file, rdg, i);
4653 fprintf (file, ")\n");
4656 /* Call dump_rdg_vertex on stderr. */
4659 debug_rdg_component (struct graph *rdg, int c)
4661 dump_rdg_component (stderr, rdg, c, NULL);
4664 /* Dump the reduced dependence graph RDG to FILE. */
4667 dump_rdg (FILE *file, struct graph *rdg)
4670 bitmap dumped = BITMAP_ALLOC (NULL);
4672 fprintf (file, "(rdg\n");
4674 for (i = 0; i < rdg->n_vertices; i++)
4675 if (!bitmap_bit_p (dumped, i))
4676 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4678 fprintf (file, ")\n");
4679 BITMAP_FREE (dumped);
4682 /* Call dump_rdg on stderr. */
4685 debug_rdg (struct graph *rdg)
4687 dump_rdg (stderr, rdg);
4691 dot_rdg_1 (FILE *file, struct graph *rdg)
4695 fprintf (file, "digraph RDG {\n");
4697 for (i = 0; i < rdg->n_vertices; i++)
4699 struct vertex *v = &(rdg->vertices[i]);
4700 struct graph_edge *e;
4702 /* Highlight reads from memory. */
4703 if (RDG_MEM_READS_STMT (rdg, i))
4704 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4706 /* Highlight stores to memory. */
4707 if (RDG_MEM_WRITE_STMT (rdg, i))
4708 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4711 for (e = v->succ; e; e = e->succ_next)
4712 switch (RDGE_TYPE (e))
4715 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4719 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4723 /* These are the most common dependences: don't print these. */
4724 fprintf (file, "%d -> %d \n", i, e->dest);
4728 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4736 fprintf (file, "}\n\n");
4739 /* Display the Reduced Dependence Graph using dotty. */
4740 extern void dot_rdg (struct graph *);
4743 dot_rdg (struct graph *rdg)
4745 /* When debugging, enable the following code. This cannot be used
4746 in production compilers because it calls "system". */
4748 FILE *file = fopen ("/tmp/rdg.dot", "w");
4749 gcc_assert (file != NULL);
4751 dot_rdg_1 (file, rdg);
4754 system ("dotty /tmp/rdg.dot &");
4756 dot_rdg_1 (stderr, rdg);
4760 /* This structure is used for recording the mapping statement index in
4763 struct GTY(()) rdg_vertex_info
4769 /* Returns the index of STMT in RDG. */
4772 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4774 struct rdg_vertex_info rvi, *slot;
4777 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4785 /* Creates an edge in RDG for each distance vector from DDR. The
4786 order that we keep track of in the RDG is the order in which
4787 statements have to be executed. */
4790 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4792 struct graph_edge *e;
4794 data_reference_p dra = DDR_A (ddr);
4795 data_reference_p drb = DDR_B (ddr);
4796 unsigned level = ddr_dependence_level (ddr);
4798 /* For non scalar dependences, when the dependence is REVERSED,
4799 statement B has to be executed before statement A. */
4801 && !DDR_REVERSED_P (ddr))
4803 data_reference_p tmp = dra;
4808 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4809 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4811 if (va < 0 || vb < 0)
4814 e = add_edge (rdg, va, vb);
4815 e->data = XNEW (struct rdg_edge);
4817 RDGE_LEVEL (e) = level;
4818 RDGE_RELATION (e) = ddr;
4820 /* Determines the type of the data dependence. */
4821 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4822 RDGE_TYPE (e) = input_dd;
4823 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4824 RDGE_TYPE (e) = output_dd;
4825 else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4826 RDGE_TYPE (e) = flow_dd;
4827 else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4828 RDGE_TYPE (e) = anti_dd;
4831 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4832 the index of DEF in RDG. */
4835 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4837 use_operand_p imm_use_p;
4838 imm_use_iterator iterator;
4840 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4842 struct graph_edge *e;
4843 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4848 e = add_edge (rdg, idef, use);
4849 e->data = XNEW (struct rdg_edge);
4850 RDGE_TYPE (e) = flow_dd;
4851 RDGE_RELATION (e) = NULL;
4855 /* Creates the edges of the reduced dependence graph RDG. */
4858 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4861 struct data_dependence_relation *ddr;
4862 def_operand_p def_p;
4865 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4866 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4867 create_rdg_edge_for_ddr (rdg, ddr);
4869 for (i = 0; i < rdg->n_vertices; i++)
4870 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4872 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4875 /* Build the vertices of the reduced dependence graph RDG. */
4878 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4883 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4885 VEC (data_ref_loc, heap) *references;
4887 struct vertex *v = &(rdg->vertices[i]);
4888 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4889 struct rdg_vertex_info **slot;
4893 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4900 v->data = XNEW (struct rdg_vertex);
4901 RDG_STMT (rdg, i) = stmt;
4903 RDG_MEM_WRITE_STMT (rdg, i) = false;
4904 RDG_MEM_READS_STMT (rdg, i) = false;
4905 if (gimple_code (stmt) == GIMPLE_PHI)
4908 get_references_in_stmt (stmt, &references);
4909 FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4911 RDG_MEM_WRITE_STMT (rdg, i) = true;
4913 RDG_MEM_READS_STMT (rdg, i) = true;
4915 VEC_free (data_ref_loc, heap, references);
4919 /* Initialize STMTS with all the statements of LOOP. When
4920 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4921 which we discover statements is important as
4922 generate_loops_for_partition is using the same traversal for
4923 identifying statements. */
4926 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4929 basic_block *bbs = get_loop_body_in_dom_order (loop);
4931 for (i = 0; i < loop->num_nodes; i++)
4933 basic_block bb = bbs[i];
4934 gimple_stmt_iterator bsi;
4937 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4938 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4940 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4942 stmt = gsi_stmt (bsi);
4943 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
4944 VEC_safe_push (gimple, heap, *stmts, stmt);
4951 /* Returns true when all the dependences are computable. */
4954 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4959 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4960 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4966 /* Computes a hash function for element ELT. */
4969 hash_stmt_vertex_info (const void *elt)
4971 const struct rdg_vertex_info *const rvi =
4972 (const struct rdg_vertex_info *) elt;
4973 gimple stmt = rvi->stmt;
4975 return htab_hash_pointer (stmt);
4978 /* Compares database elements E1 and E2. */
4981 eq_stmt_vertex_info (const void *e1, const void *e2)
4983 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4984 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4986 return elt1->stmt == elt2->stmt;
4989 /* Free the element E. */
4992 hash_stmt_vertex_del (void *e)
4997 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4998 statement of the loop nest, and one edge per data dependence or
4999 scalar dependence. */
5002 build_empty_rdg (int n_stmts)
5004 int nb_data_refs = 10;
5005 struct graph *rdg = new_graph (n_stmts);
5007 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5008 eq_stmt_vertex_info, hash_stmt_vertex_del);
5012 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5013 statement of the loop nest, and one edge per data dependence or
5014 scalar dependence. */
5017 build_rdg (struct loop *loop,
5018 VEC (loop_p, heap) **loop_nest,
5019 VEC (ddr_p, heap) **dependence_relations,
5020 VEC (data_reference_p, heap) **datarefs)
5022 struct graph *rdg = NULL;
5023 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5025 compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5026 dependence_relations);
5028 if (known_dependences_p (*dependence_relations))
5030 stmts_from_loop (loop, &stmts);
5031 rdg = build_empty_rdg (VEC_length (gimple, stmts));
5032 create_rdg_vertices (rdg, stmts);
5033 create_rdg_edges (rdg, *dependence_relations);
5036 VEC_free (gimple, heap, stmts);
5040 /* Free the reduced dependence graph RDG. */
5043 free_rdg (struct graph *rdg)
5047 for (i = 0; i < rdg->n_vertices; i++)
5049 struct vertex *v = &(rdg->vertices[i]);
5050 struct graph_edge *e;
5052 for (e = v->succ; e; e = e->succ_next)
5058 htab_delete (rdg->indices);
5062 /* Initialize STMTS with all the statements of LOOP that contain a
5066 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5069 basic_block *bbs = get_loop_body_in_dom_order (loop);
5071 for (i = 0; i < loop->num_nodes; i++)
5073 basic_block bb = bbs[i];
5074 gimple_stmt_iterator bsi;
5076 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5077 if (gimple_vdef (gsi_stmt (bsi)))
5078 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5084 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5085 that contains a data reference on its LHS with a stride of the same
5086 size as its unit type. */
5089 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5093 struct data_reference *dr;
5096 || !gimple_vdef (stmt)
5097 || !is_gimple_assign (stmt)
5098 || !gimple_assign_single_p (stmt)
5099 || !(op1 = gimple_assign_rhs1 (stmt))
5100 || !(integer_zerop (op1) || real_zerop (op1)))
5103 dr = XCNEW (struct data_reference);
5104 op0 = gimple_assign_lhs (stmt);
5106 DR_STMT (dr) = stmt;
5109 res = dr_analyze_innermost (dr)
5110 && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5116 /* Initialize STMTS with all the statements of LOOP that contain a
5117 store to memory of the form "A[i] = 0". */
5120 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5124 gimple_stmt_iterator si;
5126 basic_block *bbs = get_loop_body_in_dom_order (loop);
5128 for (i = 0; i < loop->num_nodes; i++)
5129 for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5130 if ((stmt = gsi_stmt (si))
5131 && stmt_with_adjacent_zero_store_dr_p (stmt))
5132 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5137 /* For a data reference REF, return the declaration of its base
5138 address or NULL_TREE if the base is not determined. */
5141 ref_base_address (gimple stmt, data_ref_loc *ref)
5143 tree base = NULL_TREE;
5145 struct data_reference *dr = XCNEW (struct data_reference);
5147 DR_STMT (dr) = stmt;
5148 DR_REF (dr) = *ref->pos;
5149 dr_analyze_innermost (dr);
5150 base_address = DR_BASE_ADDRESS (dr);
5155 switch (TREE_CODE (base_address))
5158 base = TREE_OPERAND (base_address, 0);
5162 base = base_address;
5171 /* Determines whether the statement from vertex V of the RDG has a
5172 definition used outside the loop that contains this statement. */
5175 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5177 gimple stmt = RDG_STMT (rdg, v);
5178 struct loop *loop = loop_containing_stmt (stmt);
5179 use_operand_p imm_use_p;
5180 imm_use_iterator iterator;
5182 def_operand_p def_p;
5187 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5189 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5191 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5199 /* Determines whether statements S1 and S2 access to similar memory
5200 locations. Two memory accesses are considered similar when they
5201 have the same base address declaration, i.e. when their
5202 ref_base_address is the same. */
5205 have_similar_memory_accesses (gimple s1, gimple s2)
5209 VEC (data_ref_loc, heap) *refs1, *refs2;
5210 data_ref_loc *ref1, *ref2;
5212 get_references_in_stmt (s1, &refs1);
5213 get_references_in_stmt (s2, &refs2);
5215 FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5217 tree base1 = ref_base_address (s1, ref1);
5220 FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5221 if (base1 == ref_base_address (s2, ref2))
5229 VEC_free (data_ref_loc, heap, refs1);
5230 VEC_free (data_ref_loc, heap, refs2);
5234 /* Helper function for the hashtab. */
5237 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5239 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5240 CONST_CAST_GIMPLE ((const_gimple) s2));
5243 /* Helper function for the hashtab. */
5246 ref_base_address_1 (const void *s)
5248 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5250 VEC (data_ref_loc, heap) *refs;
5254 get_references_in_stmt (stmt, &refs);
5256 FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5259 res = htab_hash_pointer (ref_base_address (stmt, ref));
5263 VEC_free (data_ref_loc, heap, refs);
5267 /* Try to remove duplicated write data references from STMTS. */
5270 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5274 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5275 have_similar_memory_accesses_1, NULL);
5277 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5281 slot = htab_find_slot (seen, stmt, INSERT);
5284 VEC_ordered_remove (gimple, *stmts, i);
5287 *slot = (void *) stmt;
5295 /* Returns the index of PARAMETER in the parameters vector of the
5296 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5299 access_matrix_get_index_for_parameter (tree parameter,
5300 struct access_matrix *access_matrix)
5303 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5304 tree lambda_parameter;
5306 FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5307 if (lambda_parameter == parameter)
5308 return i + AM_NB_INDUCTION_VARS (access_matrix);