2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
47 #include "coretypes.h"
48 #include "tree-flow.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 /* If bit I is not set, it means that this node represents an
56 operation that has already been performed, and that should not be
57 performed again. This is the subgraph of remaining important
58 computations that is passed to the DFS algorithm for avoiding to
59 include several times the same stores in different loops. */
60 static bitmap remaining_stmts;
62 /* A node of the RDG is marked in this bitmap when it has as a
63 predecessor a node that writes to memory. */
64 static bitmap upstream_mem_writes;
66 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
70 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
72 imm_use_iterator imm_iter;
75 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
76 if (loop != loop_containing_stmt (USE_STMT (use_p)))
82 /* Returns true when STMT defines a scalar variable used after the
86 stmt_has_scalar_dependences_outside_loop (gimple stmt)
90 switch (gimple_code (stmt))
93 name = gimple_assign_lhs (stmt);
97 name = gimple_phi_result (stmt);
104 return TREE_CODE (name) == SSA_NAME
105 && ssa_name_has_uses_outside_loop_p (name, loop_containing_stmt (stmt));
108 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
112 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
115 gimple_stmt_iterator si_new, si_orig;
116 edge orig_loop_latch = loop_latch_edge (orig_loop);
117 edge orig_entry_e = loop_preheader_edge (orig_loop);
118 edge new_loop_entry_e = loop_preheader_edge (new_loop);
120 /* Scan the phis in the headers of the old and new loops
121 (they are organized in exactly the same order). */
122 for (si_new = gsi_start_phis (new_loop->header),
123 si_orig = gsi_start_phis (orig_loop->header);
124 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
125 gsi_next (&si_new), gsi_next (&si_orig))
128 source_location locus;
129 gimple phi_new = gsi_stmt (si_new);
130 gimple phi_orig = gsi_stmt (si_orig);
132 /* Add the first phi argument for the phi in NEW_LOOP (the one
133 associated with the entry of NEW_LOOP) */
134 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
135 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
136 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
138 /* Add the second phi argument for the phi in NEW_LOOP (the one
139 associated with the latch of NEW_LOOP) */
140 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
141 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
143 if (TREE_CODE (def) == SSA_NAME)
145 new_ssa_name = get_current_def (def);
148 /* This only happens if there are no definitions inside the
149 loop. Use the the invariant in the new loop as is. */
153 /* Could be an integer. */
156 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
160 /* Return a copy of LOOP placed before LOOP. */
163 copy_loop_before (struct loop *loop)
166 edge preheader = loop_preheader_edge (loop);
168 if (!single_exit (loop))
171 initialize_original_copy_tables ();
172 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
173 free_original_copy_tables ();
178 update_phis_for_loop_copy (loop, res);
179 rename_variables_in_loop (res);
184 /* Creates an empty basic block after LOOP. */
187 create_bb_after_loop (struct loop *loop)
189 edge exit = single_exit (loop);
197 /* Generate code for PARTITION from the code in LOOP. The loop is
198 copied when COPY_P is true. All the statements not flagged in the
199 PARTITION bitmap are removed from the loop or from its copy. The
200 statements are indexed in sequence inside a basic block, and the
201 basic blocks of a loop are taken in dom order. Returns true when
202 the code gen succeeded. */
205 generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
208 gimple_stmt_iterator bsi;
213 loop = copy_loop_before (loop);
214 create_preheader (loop, CP_SIMPLE_PREHEADERS);
215 create_bb_after_loop (loop);
221 /* Remove stmts not in the PARTITION bitmap. The order in which we
222 visit the phi nodes and the statements is exactly as in
224 bbs = get_loop_body_in_dom_order (loop);
226 if (MAY_HAVE_DEBUG_STMTS)
227 for (x = 0, i = 0; i < loop->num_nodes; i++)
229 basic_block bb = bbs[i];
231 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
232 if (!bitmap_bit_p (partition, x++))
233 reset_debug_uses (gsi_stmt (bsi));
235 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
237 gimple stmt = gsi_stmt (bsi);
238 if (gimple_code (stmt) != GIMPLE_LABEL
239 && !is_gimple_debug (stmt)
240 && !bitmap_bit_p (partition, x++))
241 reset_debug_uses (stmt);
245 for (x = 0, i = 0; i < loop->num_nodes; i++)
247 basic_block bb = bbs[i];
249 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
250 if (!bitmap_bit_p (partition, x++))
252 gimple phi = gsi_stmt (bsi);
253 if (!is_gimple_reg (gimple_phi_result (phi)))
254 mark_virtual_phi_result_for_renaming (phi);
255 remove_phi_node (&bsi, true);
260 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
262 gimple stmt = gsi_stmt (bsi);
263 if (gimple_code (stmt) != GIMPLE_LABEL
264 && !is_gimple_debug (stmt)
265 && !bitmap_bit_p (partition, x++))
267 unlink_stmt_vdef (stmt);
268 gsi_remove (&bsi, true);
280 /* Build the size argument for a memset call. */
283 build_size_arg_loc (location_t loc, tree nb_iter, tree op,
284 gimple_seq *stmt_list)
287 tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node,
288 fold_convert_loc (loc, size_type_node, nb_iter),
289 fold_convert_loc (loc, size_type_node,
290 TYPE_SIZE_UNIT (TREE_TYPE (op))));
291 x = force_gimple_operand (x, &stmts, true, NULL);
292 gimple_seq_add_seq (stmt_list, stmts);
297 /* Generate a call to memset. Return true when the operation succeeded. */
300 generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
301 gimple_stmt_iterator bsi)
303 tree addr_base, nb_bytes;
305 gimple_seq stmt_list = NULL, stmts;
308 struct data_reference *dr = XCNEW (struct data_reference);
309 location_t loc = gimple_location (stmt);
313 res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
314 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
316 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
317 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
318 addr_base = fold_convert_loc (loc, sizetype, addr_base);
320 /* Test for a negative stride, iterating over every element. */
321 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
323 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
324 fold_convert_loc (loc, sizetype, nb_bytes));
325 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
326 TYPE_SIZE_UNIT (TREE_TYPE (op0)));
329 addr_base = fold_build_pointer_plus_loc (loc,
330 DR_BASE_ADDRESS (dr), addr_base);
331 mem = force_gimple_operand (addr_base, &stmts, true, NULL);
332 gimple_seq_add_seq (&stmt_list, stmts);
334 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
335 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
336 gimple_seq_add_stmt (&stmt_list, fn_call);
337 gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
339 if (dump_file && (dump_flags & TDF_DETAILS))
340 fprintf (dump_file, "generated memset zero\n");
345 /* Tries to generate a builtin function for the instructions of LOOP
346 pointed to by the bits set in PARTITION. Returns true when the
347 operation succeeded. */
350 generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
356 gimple_stmt_iterator bsi;
357 tree nb_iter = number_of_exit_cond_executions (loop);
359 if (!nb_iter || nb_iter == chrec_dont_know)
362 bbs = get_loop_body_in_dom_order (loop);
364 for (i = 0; i < loop->num_nodes; i++)
366 basic_block bb = bbs[i];
368 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
371 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
373 gimple stmt = gsi_stmt (bsi);
375 if (gimple_code (stmt) == GIMPLE_LABEL
376 || is_gimple_debug (stmt))
379 if (!bitmap_bit_p (partition, x++))
382 /* If the stmt has uses outside of the loop fail. */
383 if (stmt_has_scalar_dependences_outside_loop (stmt))
386 if (is_gimple_assign (stmt)
387 && !is_gimple_reg (gimple_assign_lhs (stmt)))
389 /* Don't generate the builtins when there are more than
395 if (bb == loop->latch)
396 nb_iter = number_of_latch_executions (loop);
401 if (!stmt_with_adjacent_zero_store_dr_p (write))
404 /* The new statements will be placed before LOOP. */
405 bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
406 generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
409 /* If this is the last partition for which we generate code, we have
410 to destroy the loop. */
413 unsigned nbbs = loop->num_nodes;
414 edge exit = single_exit (loop);
415 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
416 redirect_edge_pred (exit, src);
417 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
418 exit->flags |= EDGE_FALLTHRU;
419 cancel_loop_tree (loop);
420 rescan_loop_exit (exit, false, true);
422 for (i = 0; i < nbbs; i++)
423 delete_basic_block (bbs[i]);
425 set_immediate_dominator (CDI_DOMINATORS, dest,
426 recompute_dominator (CDI_DOMINATORS, dest));
434 /* Generates code for PARTITION. For simple loops, this function can
435 generate a built-in. */
438 generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p)
440 if (generate_builtin (loop, partition, copy_p))
443 return generate_loops_for_partition (loop, partition, copy_p);
447 /* Returns true if the node V of RDG cannot be recomputed. */
450 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
452 if (RDG_MEM_WRITE_STMT (rdg, v))
458 /* Returns true when the vertex V has already been generated in the
459 current partition (V is in PROCESSED), or when V belongs to another
460 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
463 already_processed_vertex_p (bitmap processed, int v)
465 return (bitmap_bit_p (processed, v)
466 || !bitmap_bit_p (remaining_stmts, v));
469 /* Returns NULL when there is no anti-dependence among the successors
470 of vertex V, otherwise returns the edge with the anti-dep. */
472 static struct graph_edge *
473 has_anti_dependence (struct vertex *v)
475 struct graph_edge *e;
478 for (e = v->succ; e; e = e->succ_next)
479 if (RDGE_TYPE (e) == anti_dd)
485 /* Returns true when V has an anti-dependence edge among its successors. */
488 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
490 struct graph_edge *e;
493 for (e = v->pred; e; e = e->pred_next)
494 if (bitmap_bit_p (upstream_mem_writes, e->src)
495 /* Don't consider flow channels: a write to memory followed
496 by a read from memory. These channels allow the split of
497 the RDG in different partitions. */
498 && !RDG_MEM_WRITE_STMT (rdg, e->src))
504 /* Initializes the upstream_mem_writes bitmap following the
505 information from RDG. */
508 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
511 bitmap seen = BITMAP_ALLOC (NULL);
513 for (v = rdg->n_vertices - 1; v >= 0; v--)
514 if (!bitmap_bit_p (seen, v))
517 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
519 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
521 FOR_EACH_VEC_ELT (int, nodes, i, x)
523 if (!bitmap_set_bit (seen, x))
526 if (RDG_MEM_WRITE_STMT (rdg, x)
527 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
528 /* In anti dependences the read should occur before
529 the write, this is why both the read and the write
530 should be placed in the same partition. */
531 || has_anti_dependence (&(rdg->vertices[x])))
533 bitmap_set_bit (upstream_mem_writes, x);
537 VEC_free (int, heap, nodes);
541 /* Returns true when vertex u has a memory write node as a predecessor
545 has_upstream_mem_writes (int u)
547 return bitmap_bit_p (upstream_mem_writes, u);
550 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
553 /* Flag the uses of U stopping following the information from
554 upstream_mem_writes. */
557 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
558 bitmap processed, bool *part_has_writes)
561 struct vertex *x = &(rdg->vertices[u]);
562 gimple stmt = RDGV_STMT (x);
563 struct graph_edge *anti_dep = has_anti_dependence (x);
565 /* Keep in the same partition the destination of an antidependence,
566 because this is a store to the exact same location. Putting this
567 in another partition is bad for cache locality. */
570 int v = anti_dep->dest;
572 if (!already_processed_vertex_p (processed, v))
573 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
574 processed, part_has_writes);
577 if (gimple_code (stmt) != GIMPLE_PHI)
579 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
581 tree use = USE_FROM_PTR (use_p);
583 if (TREE_CODE (use) == SSA_NAME)
585 gimple def_stmt = SSA_NAME_DEF_STMT (use);
586 int v = rdg_vertex_for_stmt (rdg, def_stmt);
589 && !already_processed_vertex_p (processed, v))
590 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
591 processed, part_has_writes);
596 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
598 tree op0 = gimple_assign_lhs (stmt);
600 /* Scalar channels don't have enough space for transmitting data
601 between tasks, unless we add more storage by privatizing. */
602 if (is_gimple_reg (op0))
605 imm_use_iterator iter;
607 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
609 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
611 if (!already_processed_vertex_p (processed, v))
612 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
613 processed, part_has_writes);
619 /* Flag V from RDG as part of PARTITION, and also flag its loop number
623 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops,
624 bool *part_has_writes)
628 if (!bitmap_set_bit (partition, v))
631 loop = loop_containing_stmt (RDG_STMT (rdg, v));
632 bitmap_set_bit (loops, loop->num);
634 if (rdg_cannot_recompute_vertex_p (rdg, v))
636 *part_has_writes = true;
637 bitmap_clear_bit (remaining_stmts, v);
641 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
642 Also flag their loop number in LOOPS. */
645 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, bitmap partition,
646 bitmap loops, bitmap processed,
647 bool *part_has_writes)
650 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
653 bitmap_set_bit (processed, v);
654 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
655 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
656 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
658 FOR_EACH_VEC_ELT (int, nodes, i, x)
659 if (!already_processed_vertex_p (processed, x))
660 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
663 VEC_free (int, heap, nodes);
666 /* Initialize CONDS with all the condition statements from the basic
670 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
674 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
676 FOR_EACH_VEC_ELT (edge, exits, i, e)
678 gimple cond = last_stmt (e->src);
681 VEC_safe_push (gimple, heap, *conds, cond);
684 VEC_free (edge, heap, exits);
687 /* Add to PARTITION all the exit condition statements for LOOPS
688 together with all their dependent statements determined from
692 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
693 bitmap processed, bool *part_has_writes)
697 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
699 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
700 collect_condition_stmts (get_loop (i), &conds);
702 while (!VEC_empty (gimple, conds))
704 gimple cond = VEC_pop (gimple, conds);
705 int v = rdg_vertex_for_stmt (rdg, cond);
706 bitmap new_loops = BITMAP_ALLOC (NULL);
708 if (!already_processed_vertex_p (processed, v))
709 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
712 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
713 if (bitmap_set_bit (loops, i))
714 collect_condition_stmts (get_loop (i), &conds);
716 BITMAP_FREE (new_loops);
719 VEC_free (gimple, heap, conds);
722 /* Returns a bitmap in which all the statements needed for computing
723 the strongly connected component C of the RDG are flagged, also
724 including the loop exit conditions. */
727 build_rdg_partition_for_component (struct graph *rdg, rdgc c,
728 bool *part_has_writes)
731 bitmap partition = BITMAP_ALLOC (NULL);
732 bitmap loops = BITMAP_ALLOC (NULL);
733 bitmap processed = BITMAP_ALLOC (NULL);
735 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
736 if (!already_processed_vertex_p (processed, v))
737 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
740 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
742 BITMAP_FREE (processed);
747 /* Free memory for COMPONENTS. */
750 free_rdg_components (VEC (rdgc, heap) *components)
755 FOR_EACH_VEC_ELT (rdgc, components, i, x)
757 VEC_free (int, heap, x->vertices);
761 VEC_free (rdgc, heap, components);
764 /* Build the COMPONENTS vector with the strongly connected components
765 of RDG in which the STARTING_VERTICES occur. */
768 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
769 VEC (rdgc, heap) **components)
772 bitmap saved_components = BITMAP_ALLOC (NULL);
773 int n_components = graphds_scc (rdg, NULL);
774 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
776 for (i = 0; i < n_components; i++)
777 all_components[i] = VEC_alloc (int, heap, 3);
779 for (i = 0; i < rdg->n_vertices; i++)
780 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
782 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
784 int c = rdg->vertices[v].component;
786 if (bitmap_set_bit (saved_components, c))
788 rdgc x = XCNEW (struct rdg_component);
790 x->vertices = all_components[c];
792 VEC_safe_push (rdgc, heap, *components, x);
796 for (i = 0; i < n_components; i++)
797 if (!bitmap_bit_p (saved_components, i))
798 VEC_free (int, heap, all_components[i]);
800 free (all_components);
801 BITMAP_FREE (saved_components);
804 /* Returns true when it is possible to generate a builtin pattern for
805 the PARTITION of RDG. For the moment we detect only the memset
809 can_generate_builtin (struct graph *rdg, bitmap partition)
817 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
818 if (RDG_MEM_READS_STMT (rdg, i))
820 else if (RDG_MEM_WRITE_STMT (rdg, i))
823 if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i)))
827 return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
830 /* Returns true when PARTITION1 and PARTITION2 have similar memory
834 similar_memory_accesses (struct graph *rdg, bitmap partition1,
838 bitmap_iterator bi, bj;
840 EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
841 if (RDG_MEM_WRITE_STMT (rdg, i)
842 || RDG_MEM_READS_STMT (rdg, i))
843 EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
844 if (RDG_MEM_WRITE_STMT (rdg, j)
845 || RDG_MEM_READS_STMT (rdg, j))
846 if (rdg_has_similar_memory_accesses (rdg, i, j))
852 /* Fuse all the partitions from PARTITIONS that contain similar memory
853 references, i.e., we're taking care of cache locality. This
854 function does not fuse those partitions that contain patterns that
855 can be code generated with builtins. */
858 fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
859 VEC (bitmap, heap) **partitions)
862 bitmap partition1, partition2;
864 FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
865 if (!can_generate_builtin (rdg, partition1))
866 FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
868 && !can_generate_builtin (rdg, partition2)
869 && similar_memory_accesses (rdg, partition1, partition2))
871 bitmap_ior_into (partition1, partition2);
872 VEC_ordered_remove (bitmap, *partitions, p2);
877 /* Returns true when STMT will be code generated in a partition of RDG
878 different than PART and that will not be code generated as a
882 stmt_generated_in_another_partition (struct graph *rdg, gimple stmt, int part,
883 VEC (bitmap, heap) *partitions)
890 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
892 && !can_generate_builtin (rdg, pp))
893 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
894 if (stmt == RDG_STMT (rdg, i))
900 /* For each partition in PARTITIONS that will be code generated using
901 a builtin, add its scalar computations used after the loop to
905 add_scalar_computations_to_partition (struct graph *rdg,
906 VEC (bitmap, heap) *partitions,
913 bitmap l = BITMAP_ALLOC (NULL);
914 bitmap pr = BITMAP_ALLOC (NULL);
917 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
918 if (can_generate_builtin (rdg, pp))
919 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
920 if (stmt_has_scalar_dependences_outside_loop (RDG_STMT (rdg, i))
921 && !stmt_generated_in_another_partition (rdg, RDG_STMT (rdg, i), p,
923 rdg_flag_vertex_and_dependent (rdg, i, partition, l, pr, &f);
925 rdg_flag_loop_exits (rdg, l, partition, pr, &f);
931 /* Aggregate several components into a useful partition that is
932 registered in the PARTITIONS vector. Partitions will be
933 distributed in different loops. */
936 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
937 VEC (int, heap) **other_stores,
938 VEC (bitmap, heap) **partitions, bitmap processed)
942 bitmap partition = BITMAP_ALLOC (NULL);
944 FOR_EACH_VEC_ELT (rdgc, components, i, x)
947 bool part_has_writes = false;
948 int v = VEC_index (int, x->vertices, 0);
950 if (bitmap_bit_p (processed, v))
953 np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
954 bitmap_ior_into (partition, np);
955 bitmap_ior_into (processed, np);
960 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "ldist useful partition:\n");
963 dump_bitmap (dump_file, partition);
966 VEC_safe_push (bitmap, heap, *partitions, partition);
967 partition = BITMAP_ALLOC (NULL);
971 /* Add the nodes from the RDG that were not marked as processed, and
972 that are used outside the current loop. These are scalar
973 computations that are not yet part of previous partitions. */
974 for (i = 0; i < rdg->n_vertices; i++)
975 if (!bitmap_bit_p (processed, i)
976 && rdg_defs_used_in_other_loops_p (rdg, i))
977 VEC_safe_push (int, heap, *other_stores, i);
979 /* If there are still statements left in the OTHER_STORES array,
980 create other components and partitions with these stores and
981 their dependences. */
982 if (VEC_length (int, *other_stores) > 0)
984 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
985 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
987 rdg_build_components (rdg, *other_stores, &comps);
988 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
990 VEC_free (int, heap, foo);
991 free_rdg_components (comps);
994 add_scalar_computations_to_partition (rdg, *partitions, partition);
996 /* If there is something left in the last partition, save it. */
997 if (bitmap_count_bits (partition) > 0)
998 VEC_safe_push (bitmap, heap, *partitions, partition);
1000 BITMAP_FREE (partition);
1002 fuse_partitions_with_similar_memory_accesses (rdg, partitions);
1005 /* Dump to FILE the PARTITIONS. */
1008 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions)
1013 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1014 debug_bitmap_file (file, partition);
1017 /* Debug PARTITIONS. */
1018 extern void debug_rdg_partitions (VEC (bitmap, heap) *);
1021 debug_rdg_partitions (VEC (bitmap, heap) *partitions)
1023 dump_rdg_partitions (stderr, partitions);
1026 /* Returns the number of read and write operations in the RDG. */
1029 number_of_rw_in_rdg (struct graph *rdg)
1033 for (i = 0; i < rdg->n_vertices; i++)
1035 if (RDG_MEM_WRITE_STMT (rdg, i))
1038 if (RDG_MEM_READS_STMT (rdg, i))
1045 /* Returns the number of read and write operations in a PARTITION of
1049 number_of_rw_in_partition (struct graph *rdg, bitmap partition)
1055 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
1057 if (RDG_MEM_WRITE_STMT (rdg, i))
1060 if (RDG_MEM_READS_STMT (rdg, i))
1067 /* Returns true when one of the PARTITIONS contains all the read or
1068 write operations of RDG. */
1071 partition_contains_all_rw (struct graph *rdg, VEC (bitmap, heap) *partitions)
1075 int nrw = number_of_rw_in_rdg (rdg);
1077 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1078 if (nrw == number_of_rw_in_partition (rdg, partition))
1084 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1085 distributed loops. */
1088 ldist_gen (struct loop *loop, struct graph *rdg,
1089 VEC (int, heap) *starting_vertices)
1092 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1093 VEC (bitmap, heap) *partitions = VEC_alloc (bitmap, heap, 3);
1094 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1095 bitmap partition, processed = BITMAP_ALLOC (NULL);
1097 remaining_stmts = BITMAP_ALLOC (NULL);
1098 upstream_mem_writes = BITMAP_ALLOC (NULL);
1100 for (i = 0; i < rdg->n_vertices; i++)
1102 bitmap_set_bit (remaining_stmts, i);
1104 /* Save in OTHER_STORES all the memory writes that are not in
1105 STARTING_VERTICES. */
1106 if (RDG_MEM_WRITE_STMT (rdg, i))
1112 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1120 VEC_safe_push (int, heap, other_stores, i);
1124 mark_nodes_having_upstream_mem_writes (rdg);
1125 rdg_build_components (rdg, starting_vertices, &components);
1126 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1128 BITMAP_FREE (processed);
1129 nbp = VEC_length (bitmap, partitions);
1132 || partition_contains_all_rw (rdg, partitions))
1135 if (dump_file && (dump_flags & TDF_DETAILS))
1136 dump_rdg_partitions (dump_file, partitions);
1138 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1139 if (!generate_code_for_partition (loop, partition, i < nbp - 1))
1142 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1143 update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa);
1147 BITMAP_FREE (remaining_stmts);
1148 BITMAP_FREE (upstream_mem_writes);
1150 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1151 BITMAP_FREE (partition);
1153 VEC_free (int, heap, other_stores);
1154 VEC_free (bitmap, heap, partitions);
1155 free_rdg_components (components);
1159 /* Distributes the code from LOOP in such a way that producer
1160 statements are placed before consumer statements. When STMTS is
1161 NULL, performs the maximal distribution, if STMTS is not NULL,
1162 tries to separate only these statements from the LOOP's body.
1163 Returns the number of distributed loops. */
1166 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1172 VEC (int, heap) *vertices;
1173 VEC (ddr_p, heap) *dependence_relations;
1174 VEC (data_reference_p, heap) *datarefs;
1175 VEC (loop_p, heap) *loop_nest;
1177 if (loop->num_nodes > 2)
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1181 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1187 datarefs = VEC_alloc (data_reference_p, heap, 10);
1188 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1189 loop_nest = VEC_alloc (loop_p, heap, 3);
1190 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1194 if (dump_file && (dump_flags & TDF_DETAILS))
1196 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1199 free_dependence_relations (dependence_relations);
1200 free_data_refs (datarefs);
1201 VEC_free (loop_p, heap, loop_nest);
1205 vertices = VEC_alloc (int, heap, 3);
1207 if (dump_file && (dump_flags & TDF_DETAILS))
1208 dump_rdg (dump_file, rdg);
1210 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1212 int v = rdg_vertex_for_stmt (rdg, s);
1216 VEC_safe_push (int, heap, vertices, v);
1218 if (dump_file && (dump_flags & TDF_DETAILS))
1220 "ldist asked to generate code for vertex %d\n", v);
1224 res = ldist_gen (loop, rdg, vertices);
1225 VEC_free (int, heap, vertices);
1227 free_dependence_relations (dependence_relations);
1228 free_data_refs (datarefs);
1229 VEC_free (loop_p, heap, loop_nest);
1233 /* Distribute all loops in the current function. */
1236 tree_loop_distribution (void)
1240 int nb_generated_loops = 0;
1242 FOR_EACH_LOOP (li, loop, 0)
1244 VEC (gimple, heap) *work_list = NULL;
1245 int num = loop->num;
1247 /* If the loop doesn't have a single exit we will fail anyway,
1248 so do that early. */
1249 if (!single_exit (loop))
1252 /* If both flag_tree_loop_distribute_patterns and
1253 flag_tree_loop_distribution are set, then only
1254 distribute_patterns is executed. */
1255 if (flag_tree_loop_distribute_patterns)
1257 /* With the following working list, we're asking
1258 distribute_loop to separate from the rest of the loop the
1259 stores of the form "A[i] = 0". */
1260 stores_zero_from_loop (loop, &work_list);
1262 /* Do nothing if there are no patterns to be distributed. */
1263 if (VEC_length (gimple, work_list) > 0)
1264 nb_generated_loops = distribute_loop (loop, work_list);
1266 else if (flag_tree_loop_distribution)
1268 /* With the following working list, we're asking
1269 distribute_loop to separate the stores of the loop: when
1270 dependences allow, it will end on having one store per
1272 stores_from_loop (loop, &work_list);
1274 /* A simple heuristic for cache locality is to not split
1275 stores to the same array. Without this call, an unrolled
1276 loop would be split into as many loops as unroll factor,
1277 each loop storing in the same array. */
1278 remove_similar_memory_refs (&work_list);
1280 nb_generated_loops = distribute_loop (loop, work_list);
1283 if (dump_file && (dump_flags & TDF_DETAILS))
1285 if (nb_generated_loops > 1)
1286 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1287 num, nb_generated_loops);
1289 fprintf (dump_file, "Loop %d is the same.\n", num);
1292 verify_loop_structure ();
1294 VEC_free (gimple, heap, work_list);
1301 gate_tree_loop_distribution (void)
1303 return flag_tree_loop_distribution
1304 || flag_tree_loop_distribute_patterns;
1307 struct gimple_opt_pass pass_loop_distribution =
1312 gate_tree_loop_distribution, /* gate */
1313 tree_loop_distribution, /* execute */
1316 0, /* static_pass_number */
1317 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1318 PROP_cfg | PROP_ssa, /* properties_required */
1319 0, /* properties_provided */
1320 0, /* properties_destroyed */
1321 0, /* todo_flags_start */
1323 | TODO_verify_ssa /* todo_flags_finish */