2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
5 and Sebastian Pop <sebastian.pop@amd.com>.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* This pass performs loop distribution: for example, the loop
40 This pass uses an RDG, Reduced Dependence Graph built on top of the
41 data dependence relations. The RDG is then topologically sorted to
42 obtain a map of information producers/consumers based on which it
43 generates the new loops. */
47 #include "coretypes.h"
48 #include "tree-flow.h"
50 #include "tree-chrec.h"
51 #include "tree-data-ref.h"
52 #include "tree-scalar-evolution.h"
53 #include "tree-pass.h"
55 /* If bit I is not set, it means that this node represents an
56 operation that has already been performed, and that should not be
57 performed again. This is the subgraph of remaining important
58 computations that is passed to the DFS algorithm for avoiding to
59 include several times the same stores in different loops. */
60 static bitmap remaining_stmts;
62 /* A node of the RDG is marked in this bitmap when it has as a
63 predecessor a node that writes to memory. */
64 static bitmap upstream_mem_writes;
66 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
70 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
72 imm_use_iterator imm_iter;
75 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
76 if (loop != loop_containing_stmt (USE_STMT (use_p)))
82 /* Returns true when STMT defines a scalar variable used after the
86 stmt_has_scalar_dependences_outside_loop (gimple stmt)
90 switch (gimple_code (stmt))
94 name = gimple_get_lhs (stmt);
98 name = gimple_phi_result (stmt);
106 && TREE_CODE (name) == SSA_NAME
107 && ssa_name_has_uses_outside_loop_p (name,
108 loop_containing_stmt (stmt)));
111 /* Update the PHI nodes of NEW_LOOP. NEW_LOOP is a duplicate of
115 update_phis_for_loop_copy (struct loop *orig_loop, struct loop *new_loop)
118 gimple_stmt_iterator si_new, si_orig;
119 edge orig_loop_latch = loop_latch_edge (orig_loop);
120 edge orig_entry_e = loop_preheader_edge (orig_loop);
121 edge new_loop_entry_e = loop_preheader_edge (new_loop);
123 /* Scan the phis in the headers of the old and new loops
124 (they are organized in exactly the same order). */
125 for (si_new = gsi_start_phis (new_loop->header),
126 si_orig = gsi_start_phis (orig_loop->header);
127 !gsi_end_p (si_new) && !gsi_end_p (si_orig);
128 gsi_next (&si_new), gsi_next (&si_orig))
131 source_location locus;
132 gimple phi_new = gsi_stmt (si_new);
133 gimple phi_orig = gsi_stmt (si_orig);
135 /* Add the first phi argument for the phi in NEW_LOOP (the one
136 associated with the entry of NEW_LOOP) */
137 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_entry_e);
138 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_entry_e);
139 add_phi_arg (phi_new, def, new_loop_entry_e, locus);
141 /* Add the second phi argument for the phi in NEW_LOOP (the one
142 associated with the latch of NEW_LOOP) */
143 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
144 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
146 if (TREE_CODE (def) == SSA_NAME)
148 new_ssa_name = get_current_def (def);
151 /* This only happens if there are no definitions inside the
152 loop. Use the the invariant in the new loop as is. */
156 /* Could be an integer. */
159 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
163 /* Return a copy of LOOP placed before LOOP. */
166 copy_loop_before (struct loop *loop)
169 edge preheader = loop_preheader_edge (loop);
171 if (!single_exit (loop))
174 initialize_original_copy_tables ();
175 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
176 free_original_copy_tables ();
181 update_phis_for_loop_copy (loop, res);
182 rename_variables_in_loop (res);
187 /* Creates an empty basic block after LOOP. */
190 create_bb_after_loop (struct loop *loop)
192 edge exit = single_exit (loop);
200 /* Generate code for PARTITION from the code in LOOP. The loop is
201 copied when COPY_P is true. All the statements not flagged in the
202 PARTITION bitmap are removed from the loop or from its copy. The
203 statements are indexed in sequence inside a basic block, and the
204 basic blocks of a loop are taken in dom order. Returns true when
205 the code gen succeeded. */
208 generate_loops_for_partition (struct loop *loop, bitmap partition, bool copy_p)
211 gimple_stmt_iterator bsi;
216 loop = copy_loop_before (loop);
217 create_preheader (loop, CP_SIMPLE_PREHEADERS);
218 create_bb_after_loop (loop);
224 /* Remove stmts not in the PARTITION bitmap. The order in which we
225 visit the phi nodes and the statements is exactly as in
227 bbs = get_loop_body_in_dom_order (loop);
229 if (MAY_HAVE_DEBUG_STMTS)
230 for (x = 0, i = 0; i < loop->num_nodes; i++)
232 basic_block bb = bbs[i];
234 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
235 if (!bitmap_bit_p (partition, x++))
236 reset_debug_uses (gsi_stmt (bsi));
238 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
240 gimple stmt = gsi_stmt (bsi);
241 if (gimple_code (stmt) != GIMPLE_LABEL
242 && !is_gimple_debug (stmt)
243 && !bitmap_bit_p (partition, x++))
244 reset_debug_uses (stmt);
248 for (x = 0, i = 0; i < loop->num_nodes; i++)
250 basic_block bb = bbs[i];
252 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
253 if (!bitmap_bit_p (partition, x++))
255 gimple phi = gsi_stmt (bsi);
256 if (!is_gimple_reg (gimple_phi_result (phi)))
257 mark_virtual_phi_result_for_renaming (phi);
258 remove_phi_node (&bsi, true);
263 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
265 gimple stmt = gsi_stmt (bsi);
266 if (gimple_code (stmt) != GIMPLE_LABEL
267 && !is_gimple_debug (stmt)
268 && !bitmap_bit_p (partition, x++))
270 unlink_stmt_vdef (stmt);
271 gsi_remove (&bsi, true);
283 /* Build the size argument for a memset call. */
286 build_size_arg_loc (location_t loc, tree nb_iter, tree op,
287 gimple_seq *stmt_list)
290 tree x = size_binop_loc (loc, MULT_EXPR,
291 fold_convert_loc (loc, sizetype, nb_iter),
292 TYPE_SIZE_UNIT (TREE_TYPE (op)));
293 x = force_gimple_operand (x, &stmts, true, NULL);
294 gimple_seq_add_seq (stmt_list, stmts);
299 /* Generate a call to memset. Return true when the operation succeeded. */
302 generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
303 gimple_stmt_iterator bsi)
305 tree addr_base, nb_bytes;
307 gimple_seq stmt_list = NULL, stmts;
310 struct data_reference *dr = XCNEW (struct data_reference);
311 location_t loc = gimple_location (stmt);
315 res = dr_analyze_innermost (dr);
316 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
318 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
319 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
320 addr_base = fold_convert_loc (loc, sizetype, addr_base);
322 /* Test for a negative stride, iterating over every element. */
323 if (integer_zerop (size_binop (PLUS_EXPR,
324 TYPE_SIZE_UNIT (TREE_TYPE (op0)),
325 fold_convert (sizetype, DR_STEP (dr)))))
327 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
328 fold_convert_loc (loc, sizetype, nb_bytes));
329 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
330 TYPE_SIZE_UNIT (TREE_TYPE (op0)));
333 addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
334 TREE_TYPE (DR_BASE_ADDRESS (dr)),
335 DR_BASE_ADDRESS (dr), addr_base);
336 mem = force_gimple_operand (addr_base, &stmts, true, NULL);
337 gimple_seq_add_seq (&stmt_list, stmts);
339 fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMSET]);
340 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
341 gimple_seq_add_stmt (&stmt_list, fn_call);
342 gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
344 if (dump_file && (dump_flags & TDF_DETAILS))
345 fprintf (dump_file, "generated memset zero\n");
350 /* Tries to generate a builtin function for the instructions of LOOP
351 pointed to by the bits set in PARTITION. Returns true when the
352 operation succeeded. */
355 generate_builtin (struct loop *loop, bitmap partition, bool copy_p)
361 gimple_stmt_iterator bsi;
362 tree nb_iter = number_of_exit_cond_executions (loop);
364 if (!nb_iter || nb_iter == chrec_dont_know)
367 bbs = get_loop_body_in_dom_order (loop);
369 for (i = 0; i < loop->num_nodes; i++)
371 basic_block bb = bbs[i];
373 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
376 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
378 gimple stmt = gsi_stmt (bsi);
380 if (gimple_code (stmt) == GIMPLE_LABEL
381 || is_gimple_debug (stmt))
384 if (!bitmap_bit_p (partition, x++))
387 /* If the stmt has uses outside of the loop fail. */
388 if (stmt_has_scalar_dependences_outside_loop (stmt))
391 if (is_gimple_assign (stmt)
392 && !is_gimple_reg (gimple_assign_lhs (stmt)))
394 /* Don't generate the builtins when there are more than
400 if (bb == loop->latch)
401 nb_iter = number_of_latch_executions (loop);
406 if (!stmt_with_adjacent_zero_store_dr_p (write))
409 /* The new statements will be placed before LOOP. */
410 bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
411 generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
414 /* If this is the last partition for which we generate code, we have
415 to destroy the loop. */
418 unsigned nbbs = loop->num_nodes;
419 edge exit = single_exit (loop);
420 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
421 redirect_edge_pred (exit, src);
422 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
423 exit->flags |= EDGE_FALLTHRU;
424 cancel_loop_tree (loop);
425 rescan_loop_exit (exit, false, true);
427 for (i = 0; i < nbbs; i++)
428 delete_basic_block (bbs[i]);
430 set_immediate_dominator (CDI_DOMINATORS, dest,
431 recompute_dominator (CDI_DOMINATORS, dest));
439 /* Generates code for PARTITION. For simple loops, this function can
440 generate a built-in. */
443 generate_code_for_partition (struct loop *loop, bitmap partition, bool copy_p)
445 if (generate_builtin (loop, partition, copy_p))
448 return generate_loops_for_partition (loop, partition, copy_p);
452 /* Returns true if the node V of RDG cannot be recomputed. */
455 rdg_cannot_recompute_vertex_p (struct graph *rdg, int v)
457 if (RDG_MEM_WRITE_STMT (rdg, v))
463 /* Returns true when the vertex V has already been generated in the
464 current partition (V is in PROCESSED), or when V belongs to another
465 partition and cannot be recomputed (V is not in REMAINING_STMTS). */
468 already_processed_vertex_p (bitmap processed, int v)
470 return (bitmap_bit_p (processed, v)
471 || !bitmap_bit_p (remaining_stmts, v));
474 /* Returns NULL when there is no anti-dependence among the successors
475 of vertex V, otherwise returns the edge with the anti-dep. */
477 static struct graph_edge *
478 has_anti_dependence (struct vertex *v)
480 struct graph_edge *e;
483 for (e = v->succ; e; e = e->succ_next)
484 if (RDGE_TYPE (e) == anti_dd)
490 /* Returns true when V has an anti-dependence edge among its successors. */
493 predecessor_has_mem_write (struct graph *rdg, struct vertex *v)
495 struct graph_edge *e;
498 for (e = v->pred; e; e = e->pred_next)
499 if (bitmap_bit_p (upstream_mem_writes, e->src)
500 /* Don't consider flow channels: a write to memory followed
501 by a read from memory. These channels allow the split of
502 the RDG in different partitions. */
503 && !RDG_MEM_WRITE_STMT (rdg, e->src))
509 /* Initializes the upstream_mem_writes bitmap following the
510 information from RDG. */
513 mark_nodes_having_upstream_mem_writes (struct graph *rdg)
516 bitmap seen = BITMAP_ALLOC (NULL);
518 for (v = rdg->n_vertices - 1; v >= 0; v--)
519 if (!bitmap_bit_p (seen, v))
522 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
524 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
526 FOR_EACH_VEC_ELT (int, nodes, i, x)
528 if (!bitmap_set_bit (seen, x))
531 if (RDG_MEM_WRITE_STMT (rdg, x)
532 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
533 /* In anti dependences the read should occur before
534 the write, this is why both the read and the write
535 should be placed in the same partition. */
536 || has_anti_dependence (&(rdg->vertices[x])))
538 bitmap_set_bit (upstream_mem_writes, x);
542 VEC_free (int, heap, nodes);
546 /* Returns true when vertex u has a memory write node as a predecessor
550 has_upstream_mem_writes (int u)
552 return bitmap_bit_p (upstream_mem_writes, u);
555 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
558 /* Flag the uses of U stopping following the information from
559 upstream_mem_writes. */
562 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
563 bitmap processed, bool *part_has_writes)
566 struct vertex *x = &(rdg->vertices[u]);
567 gimple stmt = RDGV_STMT (x);
568 struct graph_edge *anti_dep = has_anti_dependence (x);
570 /* Keep in the same partition the destination of an antidependence,
571 because this is a store to the exact same location. Putting this
572 in another partition is bad for cache locality. */
575 int v = anti_dep->dest;
577 if (!already_processed_vertex_p (processed, v))
578 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
579 processed, part_has_writes);
582 if (gimple_code (stmt) != GIMPLE_PHI)
584 if ((use_p = gimple_vuse_op (stmt)) != NULL_USE_OPERAND_P)
586 tree use = USE_FROM_PTR (use_p);
588 if (TREE_CODE (use) == SSA_NAME)
590 gimple def_stmt = SSA_NAME_DEF_STMT (use);
591 int v = rdg_vertex_for_stmt (rdg, def_stmt);
594 && !already_processed_vertex_p (processed, v))
595 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
596 processed, part_has_writes);
601 if (is_gimple_assign (stmt) && has_upstream_mem_writes (u))
603 tree op0 = gimple_assign_lhs (stmt);
605 /* Scalar channels don't have enough space for transmitting data
606 between tasks, unless we add more storage by privatizing. */
607 if (is_gimple_reg (op0))
610 imm_use_iterator iter;
612 FOR_EACH_IMM_USE_FAST (use_p, iter, op0)
614 int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p));
616 if (!already_processed_vertex_p (processed, v))
617 rdg_flag_vertex_and_dependent (rdg, v, partition, loops,
618 processed, part_has_writes);
624 /* Flag V from RDG as part of PARTITION, and also flag its loop number
628 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops,
629 bool *part_has_writes)
633 if (!bitmap_set_bit (partition, v))
636 loop = loop_containing_stmt (RDG_STMT (rdg, v));
637 bitmap_set_bit (loops, loop->num);
639 if (rdg_cannot_recompute_vertex_p (rdg, v))
641 *part_has_writes = true;
642 bitmap_clear_bit (remaining_stmts, v);
646 /* Flag in the bitmap PARTITION the vertex V and all its predecessors.
647 Also flag their loop number in LOOPS. */
650 rdg_flag_vertex_and_dependent (struct graph *rdg, int v, bitmap partition,
651 bitmap loops, bitmap processed,
652 bool *part_has_writes)
655 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
658 bitmap_set_bit (processed, v);
659 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
660 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
661 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
663 FOR_EACH_VEC_ELT (int, nodes, i, x)
664 if (!already_processed_vertex_p (processed, x))
665 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
668 VEC_free (int, heap, nodes);
671 /* Initialize CONDS with all the condition statements from the basic
675 collect_condition_stmts (struct loop *loop, VEC (gimple, heap) **conds)
679 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
681 FOR_EACH_VEC_ELT (edge, exits, i, e)
683 gimple cond = last_stmt (e->src);
686 VEC_safe_push (gimple, heap, *conds, cond);
689 VEC_free (edge, heap, exits);
692 /* Add to PARTITION all the exit condition statements for LOOPS
693 together with all their dependent statements determined from
697 rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition,
698 bitmap processed, bool *part_has_writes)
702 VEC (gimple, heap) *conds = VEC_alloc (gimple, heap, 3);
704 EXECUTE_IF_SET_IN_BITMAP (loops, 0, i, bi)
705 collect_condition_stmts (get_loop (i), &conds);
707 while (!VEC_empty (gimple, conds))
709 gimple cond = VEC_pop (gimple, conds);
710 int v = rdg_vertex_for_stmt (rdg, cond);
711 bitmap new_loops = BITMAP_ALLOC (NULL);
713 if (!already_processed_vertex_p (processed, v))
714 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
717 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
718 if (bitmap_set_bit (loops, i))
719 collect_condition_stmts (get_loop (i), &conds);
721 BITMAP_FREE (new_loops);
724 VEC_free (gimple, heap, conds);
727 /* Returns a bitmap in which all the statements needed for computing
728 the strongly connected component C of the RDG are flagged, also
729 including the loop exit conditions. */
732 build_rdg_partition_for_component (struct graph *rdg, rdgc c,
733 bool *part_has_writes)
736 bitmap partition = BITMAP_ALLOC (NULL);
737 bitmap loops = BITMAP_ALLOC (NULL);
738 bitmap processed = BITMAP_ALLOC (NULL);
740 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
741 if (!already_processed_vertex_p (processed, v))
742 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
745 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
747 BITMAP_FREE (processed);
752 /* Free memory for COMPONENTS. */
755 free_rdg_components (VEC (rdgc, heap) *components)
760 FOR_EACH_VEC_ELT (rdgc, components, i, x)
762 VEC_free (int, heap, x->vertices);
766 VEC_free (rdgc, heap, components);
769 /* Build the COMPONENTS vector with the strongly connected components
770 of RDG in which the STARTING_VERTICES occur. */
773 rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices,
774 VEC (rdgc, heap) **components)
777 bitmap saved_components = BITMAP_ALLOC (NULL);
778 int n_components = graphds_scc (rdg, NULL);
779 VEC (int, heap) **all_components = XNEWVEC (VEC (int, heap) *, n_components);
781 for (i = 0; i < n_components; i++)
782 all_components[i] = VEC_alloc (int, heap, 3);
784 for (i = 0; i < rdg->n_vertices; i++)
785 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
787 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
789 int c = rdg->vertices[v].component;
791 if (bitmap_set_bit (saved_components, c))
793 rdgc x = XCNEW (struct rdg_component);
795 x->vertices = all_components[c];
797 VEC_safe_push (rdgc, heap, *components, x);
801 for (i = 0; i < n_components; i++)
802 if (!bitmap_bit_p (saved_components, i))
803 VEC_free (int, heap, all_components[i]);
805 free (all_components);
806 BITMAP_FREE (saved_components);
809 /* Returns true when it is possible to generate a builtin pattern for
810 the PARTITION of RDG. For the moment we detect only the memset
814 can_generate_builtin (struct graph *rdg, bitmap partition)
822 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
823 if (RDG_MEM_READS_STMT (rdg, i))
825 else if (RDG_MEM_WRITE_STMT (rdg, i))
828 if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i)))
832 return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
835 /* Returns true when PARTITION1 and PARTITION2 have similar memory
839 similar_memory_accesses (struct graph *rdg, bitmap partition1,
843 bitmap_iterator bi, bj;
845 EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
846 if (RDG_MEM_WRITE_STMT (rdg, i)
847 || RDG_MEM_READS_STMT (rdg, i))
848 EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
849 if (RDG_MEM_WRITE_STMT (rdg, j)
850 || RDG_MEM_READS_STMT (rdg, j))
851 if (rdg_has_similar_memory_accesses (rdg, i, j))
857 /* Fuse all the partitions from PARTITIONS that contain similar memory
858 references, i.e., we're taking care of cache locality. This
859 function does not fuse those partitions that contain patterns that
860 can be code generated with builtins. */
863 fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
864 VEC (bitmap, heap) **partitions)
867 bitmap partition1, partition2;
869 FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
870 if (!can_generate_builtin (rdg, partition1))
871 FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
873 && !can_generate_builtin (rdg, partition2)
874 && similar_memory_accesses (rdg, partition1, partition2))
876 bitmap_ior_into (partition1, partition2);
877 VEC_ordered_remove (bitmap, *partitions, p2);
882 /* Returns true when STMT will be code generated in a partition of RDG
883 different than PART and that will not be code generated as a
887 stmt_generated_in_another_partition (struct graph *rdg, gimple stmt, int part,
888 VEC (bitmap, heap) *partitions)
895 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
897 && !can_generate_builtin (rdg, pp))
898 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
899 if (stmt == RDG_STMT (rdg, i))
905 /* For each partition in PARTITIONS that will be code generated using
906 a builtin, add its scalar computations used after the loop to
910 add_scalar_computations_to_partition (struct graph *rdg,
911 VEC (bitmap, heap) *partitions,
918 bitmap l = BITMAP_ALLOC (NULL);
919 bitmap pr = BITMAP_ALLOC (NULL);
922 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
923 if (can_generate_builtin (rdg, pp))
924 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
925 if (stmt_has_scalar_dependences_outside_loop (RDG_STMT (rdg, i))
926 && !stmt_generated_in_another_partition (rdg, RDG_STMT (rdg, i), p,
928 rdg_flag_vertex_and_dependent (rdg, i, partition, l, pr, &f);
930 rdg_flag_loop_exits (rdg, l, partition, pr, &f);
936 /* Aggregate several components into a useful partition that is
937 registered in the PARTITIONS vector. Partitions will be
938 distributed in different loops. */
941 rdg_build_partitions (struct graph *rdg, VEC (rdgc, heap) *components,
942 VEC (int, heap) **other_stores,
943 VEC (bitmap, heap) **partitions, bitmap processed)
947 bitmap partition = BITMAP_ALLOC (NULL);
949 FOR_EACH_VEC_ELT (rdgc, components, i, x)
952 bool part_has_writes = false;
953 int v = VEC_index (int, x->vertices, 0);
955 if (bitmap_bit_p (processed, v))
958 np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
959 bitmap_ior_into (partition, np);
960 bitmap_ior_into (processed, np);
965 if (dump_file && (dump_flags & TDF_DETAILS))
967 fprintf (dump_file, "ldist useful partition:\n");
968 dump_bitmap (dump_file, partition);
971 VEC_safe_push (bitmap, heap, *partitions, partition);
972 partition = BITMAP_ALLOC (NULL);
976 /* Add the nodes from the RDG that were not marked as processed, and
977 that are used outside the current loop. These are scalar
978 computations that are not yet part of previous partitions. */
979 for (i = 0; i < rdg->n_vertices; i++)
980 if (!bitmap_bit_p (processed, i)
981 && rdg_defs_used_in_other_loops_p (rdg, i))
982 VEC_safe_push (int, heap, *other_stores, i);
984 /* If there are still statements left in the OTHER_STORES array,
985 create other components and partitions with these stores and
986 their dependences. */
987 if (VEC_length (int, *other_stores) > 0)
989 VEC (rdgc, heap) *comps = VEC_alloc (rdgc, heap, 3);
990 VEC (int, heap) *foo = VEC_alloc (int, heap, 3);
992 rdg_build_components (rdg, *other_stores, &comps);
993 rdg_build_partitions (rdg, comps, &foo, partitions, processed);
995 VEC_free (int, heap, foo);
996 free_rdg_components (comps);
999 add_scalar_computations_to_partition (rdg, *partitions, partition);
1001 /* If there is something left in the last partition, save it. */
1002 if (bitmap_count_bits (partition) > 0)
1003 VEC_safe_push (bitmap, heap, *partitions, partition);
1005 BITMAP_FREE (partition);
1007 fuse_partitions_with_similar_memory_accesses (rdg, partitions);
1010 /* Dump to FILE the PARTITIONS. */
1013 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions)
1018 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1019 debug_bitmap_file (file, partition);
1022 /* Debug PARTITIONS. */
1023 extern void debug_rdg_partitions (VEC (bitmap, heap) *);
1026 debug_rdg_partitions (VEC (bitmap, heap) *partitions)
1028 dump_rdg_partitions (stderr, partitions);
1031 /* Returns the number of read and write operations in the RDG. */
1034 number_of_rw_in_rdg (struct graph *rdg)
1038 for (i = 0; i < rdg->n_vertices; i++)
1040 if (RDG_MEM_WRITE_STMT (rdg, i))
1043 if (RDG_MEM_READS_STMT (rdg, i))
1050 /* Returns the number of read and write operations in a PARTITION of
1054 number_of_rw_in_partition (struct graph *rdg, bitmap partition)
1060 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
1062 if (RDG_MEM_WRITE_STMT (rdg, i))
1065 if (RDG_MEM_READS_STMT (rdg, i))
1072 /* Returns true when one of the PARTITIONS contains all the read or
1073 write operations of RDG. */
1076 partition_contains_all_rw (struct graph *rdg, VEC (bitmap, heap) *partitions)
1080 int nrw = number_of_rw_in_rdg (rdg);
1082 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1083 if (nrw == number_of_rw_in_partition (rdg, partition))
1089 /* Generate code from STARTING_VERTICES in RDG. Returns the number of
1090 distributed loops. */
1093 ldist_gen (struct loop *loop, struct graph *rdg,
1094 VEC (int, heap) *starting_vertices)
1097 VEC (rdgc, heap) *components = VEC_alloc (rdgc, heap, 3);
1098 VEC (bitmap, heap) *partitions = VEC_alloc (bitmap, heap, 3);
1099 VEC (int, heap) *other_stores = VEC_alloc (int, heap, 3);
1100 bitmap partition, processed = BITMAP_ALLOC (NULL);
1102 remaining_stmts = BITMAP_ALLOC (NULL);
1103 upstream_mem_writes = BITMAP_ALLOC (NULL);
1105 for (i = 0; i < rdg->n_vertices; i++)
1107 bitmap_set_bit (remaining_stmts, i);
1109 /* Save in OTHER_STORES all the memory writes that are not in
1110 STARTING_VERTICES. */
1111 if (RDG_MEM_WRITE_STMT (rdg, i))
1117 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1125 VEC_safe_push (int, heap, other_stores, i);
1129 mark_nodes_having_upstream_mem_writes (rdg);
1130 rdg_build_components (rdg, starting_vertices, &components);
1131 rdg_build_partitions (rdg, components, &other_stores, &partitions,
1133 BITMAP_FREE (processed);
1134 nbp = VEC_length (bitmap, partitions);
1137 || partition_contains_all_rw (rdg, partitions))
1140 if (dump_file && (dump_flags & TDF_DETAILS))
1141 dump_rdg_partitions (dump_file, partitions);
1143 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1144 if (!generate_code_for_partition (loop, partition, i < nbp - 1))
1147 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1148 update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa);
1152 BITMAP_FREE (remaining_stmts);
1153 BITMAP_FREE (upstream_mem_writes);
1155 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1156 BITMAP_FREE (partition);
1158 VEC_free (int, heap, other_stores);
1159 VEC_free (bitmap, heap, partitions);
1160 free_rdg_components (components);
1164 /* Distributes the code from LOOP in such a way that producer
1165 statements are placed before consumer statements. When STMTS is
1166 NULL, performs the maximal distribution, if STMTS is not NULL,
1167 tries to separate only these statements from the LOOP's body.
1168 Returns the number of distributed loops. */
1171 distribute_loop (struct loop *loop, VEC (gimple, heap) *stmts)
1177 VEC (int, heap) *vertices;
1178 VEC (ddr_p, heap) *dependence_relations;
1179 VEC (data_reference_p, heap) *datarefs;
1180 VEC (loop_p, heap) *loop_nest;
1182 if (loop->num_nodes > 2)
1184 if (dump_file && (dump_flags & TDF_DETAILS))
1186 "FIXME: Loop %d not distributed: it has more than two basic blocks.\n",
1192 datarefs = VEC_alloc (data_reference_p, heap, 10);
1193 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1194 loop_nest = VEC_alloc (loop_p, heap, 3);
1195 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1199 if (dump_file && (dump_flags & TDF_DETAILS))
1201 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1204 free_dependence_relations (dependence_relations);
1205 free_data_refs (datarefs);
1206 VEC_free (loop_p, heap, loop_nest);
1210 vertices = VEC_alloc (int, heap, 3);
1212 if (dump_file && (dump_flags & TDF_DETAILS))
1213 dump_rdg (dump_file, rdg);
1215 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1217 int v = rdg_vertex_for_stmt (rdg, s);
1221 VEC_safe_push (int, heap, vertices, v);
1223 if (dump_file && (dump_flags & TDF_DETAILS))
1225 "ldist asked to generate code for vertex %d\n", v);
1229 res = ldist_gen (loop, rdg, vertices);
1230 VEC_free (int, heap, vertices);
1232 free_dependence_relations (dependence_relations);
1233 free_data_refs (datarefs);
1234 VEC_free (loop_p, heap, loop_nest);
1238 /* Distribute all loops in the current function. */
1241 tree_loop_distribution (void)
1245 int nb_generated_loops = 0;
1247 FOR_EACH_LOOP (li, loop, 0)
1249 VEC (gimple, heap) *work_list = NULL;
1250 int num = loop->num;
1252 /* If the loop doesn't have a single exit we will fail anyway,
1253 so do that early. */
1254 if (!single_exit (loop))
1257 /* If both flag_tree_loop_distribute_patterns and
1258 flag_tree_loop_distribution are set, then only
1259 distribute_patterns is executed. */
1260 if (flag_tree_loop_distribute_patterns)
1262 /* With the following working list, we're asking
1263 distribute_loop to separate from the rest of the loop the
1264 stores of the form "A[i] = 0". */
1265 stores_zero_from_loop (loop, &work_list);
1267 /* Do nothing if there are no patterns to be distributed. */
1268 if (VEC_length (gimple, work_list) > 0)
1269 nb_generated_loops = distribute_loop (loop, work_list);
1271 else if (flag_tree_loop_distribution)
1273 /* With the following working list, we're asking
1274 distribute_loop to separate the stores of the loop: when
1275 dependences allow, it will end on having one store per
1277 stores_from_loop (loop, &work_list);
1279 /* A simple heuristic for cache locality is to not split
1280 stores to the same array. Without this call, an unrolled
1281 loop would be split into as many loops as unroll factor,
1282 each loop storing in the same array. */
1283 remove_similar_memory_refs (&work_list);
1285 nb_generated_loops = distribute_loop (loop, work_list);
1288 if (dump_file && (dump_flags & TDF_DETAILS))
1290 if (nb_generated_loops > 1)
1291 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1292 num, nb_generated_loops);
1294 fprintf (dump_file, "Loop %d is the same.\n", num);
1297 verify_loop_structure ();
1299 VEC_free (gimple, heap, work_list);
1306 gate_tree_loop_distribution (void)
1308 return flag_tree_loop_distribution
1309 || flag_tree_loop_distribute_patterns;
1312 struct gimple_opt_pass pass_loop_distribution =
1317 gate_tree_loop_distribution, /* gate */
1318 tree_loop_distribution, /* execute */
1321 0, /* static_pass_number */
1322 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1323 PROP_cfg | PROP_ssa, /* properties_required */
1324 0, /* properties_provided */
1325 0, /* properties_destroyed */
1326 0, /* todo_flags_start */
1327 TODO_dump_func /* todo_flags_finish */