1 /* Function splitting pass
3 Free Software Foundation, Inc.
4 Contributed by Jan Hubicka <jh@suse.cz>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* The purpose of this pass is to split function bodies to improve
23 inlining. I.e. for function of the form:
48 When func becomes inlinable and when cheap_test is often true, inlining func,
49 but not fund.part leads to performance imrovement similar as inlining
50 original func while the code size growth is smaller.
52 The pass is organized in three stages:
53 1) Collect local info about basic block into BB_INFO structure and
54 compute function body estimated size and time.
55 2) Via DFS walk find all possible basic blocks where we can split
57 3) If split point is found, split at the specified BB by creating a clone
58 and updating function to call it.
60 The decisions what functions to split are in execute_split_functions
63 There are several possible future improvements for this pass including:
65 1) Splitting to break up large functions
66 2) Splitting to reduce stack frame usage
67 3) Allow split part of function to use values computed in the header part.
68 The values needs to be passed to split function, perhaps via same
69 interface as for nested functions or as argument.
70 4) Support for simple rematerialization. I.e. when split part use
71 value computed in header from function parameter in very cheap way, we
72 can just recompute it.
73 5) Support splitting of nested functions.
74 6) Support non-SSA arguments.
75 7) There is nothing preventing us from producing multiple parts of single function
76 when needed or splitting also the parts. */
80 #include "coretypes.h"
85 #include "tree-flow.h"
86 #include "tree-pass.h"
89 #include "diagnostic.h"
90 #include "tree-dump.h"
91 #include "tree-inline.h"
94 #include "gimple-pretty-print.h"
96 /* Per basic block info. */
104 DEF_VEC_ALLOC_O(bb_info,heap);
106 static VEC(bb_info, heap) *bb_info_vec;
108 /* Description of split point. */
112 /* Size of the partitions. */
113 unsigned int header_time, header_size, split_time, split_size;
115 /* SSA names that need to be passed into spit funciton. */
116 bitmap ssa_names_to_pass;
118 /* Basic block where we split (that will become entry point of new function. */
119 basic_block entry_bb;
121 /* Basic blocks we are splitting away. */
125 /* Best split point found. */
127 struct split_point best_split_point;
129 /* Callback for walk_stmt_load_store_addr_ops. If T is non-ssa automatic
130 variable, check it if it is present in bitmap passed via DATA. */
133 test_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t,
134 void *data ATTRIBUTE_UNUSED)
136 t = get_base_address (t);
138 if (t && !is_gimple_reg (t)
139 && ((TREE_CODE (t) == VAR_DECL
140 && auto_var_in_fn_p (t, current_function_decl))
141 || (TREE_CODE (t) == RESULT_DECL)
142 || (TREE_CODE (t) == PARM_DECL)))
143 return bitmap_bit_p ((bitmap)data, DECL_UID (t));
147 /* Dump split point CURRENT. */
150 dump_split_point (FILE * file, struct split_point *current)
153 "Split point at BB %i header time:%i header size: %i"
154 " split time: %i split size: %i\n bbs: ",
155 current->entry_bb->index, current->header_time,
156 current->header_size, current->split_time, current->split_size);
157 dump_bitmap (file, current->split_bbs);
158 fprintf (file, " SSA names to pass: ");
159 dump_bitmap (file, current->ssa_names_to_pass);
162 /* We found an split_point CURRENT. NON_SSA_VARS is bitmap of all non ssa
163 variables used and RETURN_BB is return basic block.
164 See if we can split function here. */
167 consider_split (struct split_point *current, bitmap non_ssa_vars,
168 basic_block return_bb)
171 unsigned int num_args = 0;
172 unsigned int call_overhead;
175 gimple_stmt_iterator bsi;
177 int incomming_freq = 0;
179 if (dump_file && (dump_flags & TDF_DETAILS))
180 dump_split_point (dump_file, current);
182 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
183 if (!bitmap_bit_p (current->split_bbs, e->src->index))
184 incomming_freq += EDGE_FREQUENCY (e);
186 /* Do not split when we would end up calling function anyway. */
188 >= (ENTRY_BLOCK_PTR->frequency
189 * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
191 if (dump_file && (dump_flags & TDF_DETAILS))
193 " Refused: incomming frequency is too large.\n");
197 if (!current->header_size)
199 if (dump_file && (dump_flags & TDF_DETAILS))
200 fprintf (dump_file, " Refused: header empty\n");
205 /* Verify that PHI args on entry are either virutal or all their operands
206 incomming from header are the same. */
207 for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
209 gimple stmt = gsi_stmt (bsi);
212 if (!is_gimple_reg (gimple_phi_result (stmt)))
214 for (i = 0; i < gimple_phi_num_args (stmt); i++)
216 edge e = gimple_phi_arg_edge (stmt, i);
217 if (!bitmap_bit_p (current->split_bbs, e->src->index))
219 tree edge_val = gimple_phi_arg_def (stmt, i);
220 if (val && edge_val != val)
222 if (dump_file && (dump_flags & TDF_DETAILS))
224 " Refused: entry BB has PHI with multiple variants\n");
233 /* See what argument we will pass to the split function and compute
235 call_overhead = eni_size_weights.call_cost;
236 for (parm = DECL_ARGUMENTS (current_function_decl); parm;
237 parm = TREE_CHAIN (parm))
239 if (!is_gimple_reg (parm))
241 if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
243 if (dump_file && (dump_flags & TDF_DETAILS))
245 " Refused: need to pass non-ssa param values\n");
249 else if (gimple_default_def (cfun, parm)
250 && bitmap_bit_p (current->ssa_names_to_pass,
251 SSA_NAME_VERSION (gimple_default_def
254 if (!VOID_TYPE_P (TREE_TYPE (parm)))
255 call_overhead += estimate_move_cost (TREE_TYPE (parm));
259 if (!VOID_TYPE_P (TREE_TYPE (current_function_decl)))
260 call_overhead += estimate_move_cost (TREE_TYPE (current_function_decl));
262 if (current->split_size <= call_overhead)
264 if (dump_file && (dump_flags & TDF_DETAILS))
266 " Refused: split size is smaller than call overhead\n");
269 if (current->header_size + call_overhead
270 >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
271 ? MAX_INLINE_INSNS_SINGLE
272 : MAX_INLINE_INSNS_AUTO))
274 if (dump_file && (dump_flags & TDF_DETAILS))
276 " Refused: header size is too large for inline candidate\n");
280 /* FIXME: we currently can pass only SSA function parameters to the split
281 arguments. Once parm_adjustment infrastructure is supported by cloning,
282 we can pass more than that. */
283 if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
286 if (dump_file && (dump_flags & TDF_DETAILS))
288 " Refused: need to pass non-param values\n");
292 /* When there are non-ssa vars used in the split region, see if they
293 are used in the header region. If so, reject the split.
294 FIXME: we can use nested function support to access both. */
295 if (!bitmap_empty_p (non_ssa_vars))
300 gimple_stmt_iterator bsi;
301 if (!bitmap_bit_p (current->split_bbs, bb->index))
303 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
305 if (is_gimple_debug (gsi_stmt (bsi)))
307 if (walk_stmt_load_store_addr_ops
308 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use,
309 test_nonssa_use, test_nonssa_use))
311 if (dump_file && (dump_flags & TDF_DETAILS))
313 " Refused: split part has non-ssa uses\n");
317 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
319 if (walk_stmt_load_store_addr_ops
320 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use,
321 test_nonssa_use, test_nonssa_use))
323 if (dump_file && (dump_flags & TDF_DETAILS))
325 " Refused: split part has non-ssa uses\n");
329 FOR_EACH_EDGE (e, ei, bb->succs)
331 if (e->dest != return_bb)
333 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi);
336 gimple stmt = gsi_stmt (bsi);
337 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
339 if (!is_gimple_reg (gimple_phi_result (stmt)))
341 if (TREE_CODE (op) != SSA_NAME
342 && test_nonssa_use (stmt, op, non_ssa_vars))
344 if (dump_file && (dump_flags & TDF_DETAILS))
346 " Refused: split part has non-ssa uses\n");
354 if (dump_file && (dump_flags & TDF_DETAILS))
355 fprintf (dump_file, " Accepted!\n");
357 /* At the moment chose split point with lowest frequency and that leaves
358 out smallest size of header.
359 In future we might re-consider this heuristics. */
360 if (!best_split_point.split_bbs
361 || best_split_point.entry_bb->frequency > current->entry_bb->frequency
362 || (best_split_point.entry_bb->frequency == current->entry_bb->frequency
363 && best_split_point.split_size < current->split_size))
366 if (dump_file && (dump_flags & TDF_DETAILS))
367 fprintf (dump_file, " New best split point!\n");
368 if (best_split_point.ssa_names_to_pass)
370 BITMAP_FREE (best_split_point.ssa_names_to_pass);
371 BITMAP_FREE (best_split_point.split_bbs);
373 best_split_point = *current;
374 best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
375 bitmap_copy (best_split_point.ssa_names_to_pass,
376 current->ssa_names_to_pass);
377 best_split_point.split_bbs = BITMAP_ALLOC (NULL);
378 bitmap_copy (best_split_point.split_bbs, current->split_bbs);
382 /* Return basic block containing RETURN statement, or EXIT_BLOCK_PTR if none
384 When there are multiple RETURN statement, chose one with return value,
385 since that one is more likely shared by multiple code paths.
386 TODO: We might support multiple return blocks. */
389 find_return_bb (void)
393 basic_block return_bb = EXIT_BLOCK_PTR;
395 if (EDGE_COUNT (EXIT_BLOCK_PTR->preds) == 1)
396 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
398 gimple_stmt_iterator bsi;
399 bool found_return = false;
400 tree retval = NULL_TREE;
402 for (bsi = gsi_start_bb (e->src); !gsi_end_p (bsi); gsi_next (&bsi))
403 if (gimple_code (gsi_stmt (bsi)) != GIMPLE_RETURN
404 && gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL
405 && !is_gimple_debug (gsi_stmt (bsi)))
407 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
410 retval = gimple_return_retval (gsi_stmt (bsi));
412 if (gsi_end_p (bsi) && found_return)
423 /* Callback for walk_stmt_load_store_addr_ops. If T is non-ssa automatic
424 variable, mark it as used in bitmap passed via DATA.
425 Return true when access to T prevents splitting the function. */
428 mark_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t,
429 void *data ATTRIBUTE_UNUSED)
431 t = get_base_address (t);
433 if (!t || is_gimple_reg (t))
436 /* At present we can't pass non-SSA arguments to split function.
437 FIXME: this can be relaxed by passing references to arguments. */
438 if (TREE_CODE (t) == PARM_DECL)
440 if (dump_file && (dump_flags & TDF_DETAILS))
441 fprintf (dump_file, "Can not split use of non-ssa function parameter.\n");
445 if ((TREE_CODE (t) == VAR_DECL && auto_var_in_fn_p (t, current_function_decl))
446 || (TREE_CODE (t) == RESULT_DECL))
447 bitmap_set_bit ((bitmap)data, DECL_UID (t));
451 /* Compute local properties of basic block BB we collect when looking for
452 split points. We look for ssa defs and store them in SET_SSA_NAMES,
453 for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
454 vars stored in NON_SSA_VARS.
456 When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
458 Return false when BB contains something that prevents it from being put into
462 visit_bb (basic_block bb, basic_block return_bb,
463 bitmap set_ssa_names, bitmap used_ssa_names,
466 gimple_stmt_iterator bsi;
469 bool can_split = true;
471 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
473 gimple stmt = gsi_stmt (bsi);
478 if (is_gimple_debug (stmt))
481 /* FIXME: We can split regions containing EH. We can not however
482 split RESX, EH_DISPATCH and EH_POINTER referring to same region
483 into different partitions. This would require tracking of
484 EH regions and checking in consider_split_point if they
485 are not used elsewhere. */
486 if (gimple_code (stmt) == GIMPLE_RESX
487 && stmt_can_throw_external (stmt))
489 if (dump_file && (dump_flags & TDF_DETAILS))
490 fprintf (dump_file, "Can not split external resx.\n");
493 if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
495 if (dump_file && (dump_flags & TDF_DETAILS))
496 fprintf (dump_file, "Can not split eh dispatch.\n");
500 /* Check builtins that prevent splitting. */
501 if (gimple_code (stmt) == GIMPLE_CALL
502 && (decl = gimple_call_fndecl (stmt)) != NULL_TREE
503 && DECL_BUILT_IN (decl)
504 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
505 switch (DECL_FUNCTION_CODE (decl))
507 /* FIXME: once we will allow passing non-parm values to split part,
508 we need to be sure to handle correct builtin_stack_save and
509 builtin_stack_restore. At the moment we are safe; there is no
510 way to store builtin_stack_save result in non-SSA variable
511 since all calls to those are compiler generated. */
513 case BUILT_IN_VA_START:
514 if (dump_file && (dump_flags & TDF_DETAILS))
515 fprintf (dump_file, "Can not split builtin_apply and va_start.\n");
518 case BUILT_IN_EH_POINTER:
519 if (dump_file && (dump_flags & TDF_DETAILS))
520 fprintf (dump_file, "Can not split builtin_eh_pointer.\n");
527 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
528 bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
529 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
530 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
531 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
536 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
538 gimple stmt = gsi_stmt (bsi);
541 if (is_gimple_debug (stmt))
543 if (!is_gimple_reg (gimple_phi_result (stmt)))
545 bitmap_set_bit (set_ssa_names,
546 SSA_NAME_VERSION (gimple_phi_result (stmt)));
547 for (i = 0; i < gimple_phi_num_args (stmt); i++)
549 tree op = gimple_phi_arg_def (stmt, i);
550 if (TREE_CODE (op) == SSA_NAME)
551 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
553 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
558 /* Record also uses comming from PHI operand in return BB. */
559 FOR_EACH_EDGE (e, ei, bb->succs)
560 if (e->dest == return_bb)
562 bool found_phi = false;
563 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
565 gimple stmt = gsi_stmt (bsi);
566 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
568 if (is_gimple_debug (stmt))
570 if (!is_gimple_reg (gimple_phi_result (stmt)))
573 if (TREE_CODE (op) == SSA_NAME)
574 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
576 can_split &= !mark_nonssa_use (stmt, op, non_ssa_vars);
578 if (!gsi_end_p (gsi_last_bb (return_bb)))
581 gimple stmt = gsi_stmt (gsi_last_bb (return_bb));
584 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
585 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
586 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
595 /* Stack entry for recursive DFS walk in find_split_point. */
599 /* Basic block we are examining. */
602 /* SSA names set and used by the BB and all BBs reachable
603 from it via DFS walk. */
604 bitmap set_ssa_names, used_ssa_names;
607 /* All BBS visited from this BB via DFS walk. */
610 /* Last examined edge in DFS walk. Since we walk unoriented graph,
611 the value is up to sum of incomming and outgoing edges of BB. */
612 unsigned int edge_num;
614 /* Stack entry index of earliest BB reachable from current BB
615 or any BB visited later in DFS valk. */
618 /* Overall time and size of all BBs reached from this BB in DFS walk. */
619 int overall_time, overall_size;
621 /* When false we can not split on this BB. */
624 DEF_VEC_O(stack_entry);
625 DEF_VEC_ALLOC_O(stack_entry,heap);
628 /* Find all articulations and call consider_split on them.
629 OVERALL_TIME and OVERALL_SIZE is time and size of the function.
631 We perform basic algorithm for finding an articulation in a graph
632 created from CFG by considering it to be an unoriented graph.
634 The articulation is discovered via DFS walk. We collect earliest
635 basic block on stack that is reachable via backward edge. Articulation
636 is any basic block such that there is no backward edge bypassing it.
637 To reduce stack usage we maintain heap allocated stack in STACK vector.
638 AUX pointer of BB is set to index it appears in the stack or -1 once
639 it is visited and popped off the stack.
641 The algorithm finds articulation after visiting the whole component
642 reachable by it. This makes it convenient to collect information about
643 the component used by consider_split. */
646 find_split_points (int overall_time, int overall_size)
649 VEC(stack_entry, heap) *stack = NULL;
651 basic_block return_bb = find_return_bb ();
652 struct split_point current;
654 current.header_time = overall_time;
655 current.header_size = overall_size;
656 current.split_time = 0;
657 current.split_size = 0;
658 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
660 first.bb = ENTRY_BLOCK_PTR;
662 first.overall_time = 0;
663 first.overall_size = 0;
664 first.earliest = INT_MAX;
665 first.set_ssa_names = 0;
666 first.used_ssa_names = 0;
667 first.bbs_visited = 0;
668 VEC_safe_push (stack_entry, heap, stack, &first);
669 ENTRY_BLOCK_PTR->aux = (void *)(intptr_t)-1;
671 while (!VEC_empty (stack_entry, stack))
673 stack_entry *entry = VEC_last (stack_entry, stack);
675 /* We are walking an acyclic graph, so edge_num counts
676 succ and pred edges together. However when considering
677 articulation, we want to have processed everything reachable
678 from articulation but nothing that reaches into it. */
679 if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
680 && entry->bb != ENTRY_BLOCK_PTR)
682 int pos = VEC_length (stack_entry, stack);
683 entry->can_split &= visit_bb (entry->bb, return_bb,
684 entry->set_ssa_names,
685 entry->used_ssa_names,
686 entry->non_ssa_vars);
687 if (pos <= entry->earliest && !entry->can_split
688 && dump_file && (dump_flags & TDF_DETAILS))
690 "found articulation at bb %i but can not split\n",
692 if (pos <= entry->earliest && entry->can_split)
694 if (dump_file && (dump_flags & TDF_DETAILS))
695 fprintf (dump_file, "found articulation at bb %i\n",
697 current.entry_bb = entry->bb;
698 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
699 bitmap_and_compl (current.ssa_names_to_pass,
700 entry->used_ssa_names, entry->set_ssa_names);
701 current.header_time = overall_time - entry->overall_time;
702 current.header_size = overall_size - entry->overall_size;
703 current.split_time = entry->overall_time;
704 current.split_size = entry->overall_size;
705 current.split_bbs = entry->bbs_visited;
706 consider_split (¤t, entry->non_ssa_vars, return_bb);
707 BITMAP_FREE (current.ssa_names_to_pass);
710 /* Do actual DFS walk. */
712 < (EDGE_COUNT (entry->bb->succs)
713 + EDGE_COUNT (entry->bb->preds)))
717 if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
719 e = EDGE_SUCC (entry->bb, entry->edge_num);
724 e = EDGE_PRED (entry->bb, entry->edge_num
725 - EDGE_COUNT (entry->bb->succs));
731 /* New BB to visit, push it to the stack. */
732 if (dest != return_bb && dest != EXIT_BLOCK_PTR
735 stack_entry new_entry;
738 new_entry.edge_num = 0;
739 new_entry.overall_time
740 = VEC_index (bb_info, bb_info_vec, dest->index)->time;
741 new_entry.overall_size
742 = VEC_index (bb_info, bb_info_vec, dest->index)->size;
743 new_entry.earliest = INT_MAX;
744 new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
745 new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
746 new_entry.bbs_visited = BITMAP_ALLOC (NULL);
747 new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
748 new_entry.can_split = true;
749 bitmap_set_bit (new_entry.bbs_visited, dest->index);
750 VEC_safe_push (stack_entry, heap, stack, &new_entry);
751 dest->aux = (void *)(intptr_t)VEC_length (stack_entry, stack);
753 /* Back edge found, record the earliest point. */
754 else if ((intptr_t)dest->aux > 0
755 && (intptr_t)dest->aux < entry->earliest)
756 entry->earliest = (intptr_t)dest->aux;
758 /* We are done with examing the edges. pop off the value from stack and
759 merge stuff we cummulate during the walk. */
760 else if (entry->bb != ENTRY_BLOCK_PTR)
762 stack_entry *prev = VEC_index (stack_entry, stack,
763 VEC_length (stack_entry, stack) - 2);
765 entry->bb->aux = (void *)(intptr_t)-1;
766 prev->can_split &= entry->can_split;
767 if (prev->set_ssa_names)
769 bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
770 bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
771 bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
772 bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
774 if (prev->earliest > entry->earliest)
775 prev->earliest = entry->earliest;
776 prev->overall_time += entry->overall_time;
777 prev->overall_size += entry->overall_size;
778 BITMAP_FREE (entry->set_ssa_names);
779 BITMAP_FREE (entry->used_ssa_names);
780 BITMAP_FREE (entry->bbs_visited);
781 BITMAP_FREE (entry->non_ssa_vars);
782 VEC_pop (stack_entry, stack);
785 VEC_pop (stack_entry, stack);
787 ENTRY_BLOCK_PTR->aux = NULL;
790 BITMAP_FREE (current.ssa_names_to_pass);
793 /* Split function at SPLIT_POINT. */
796 split_function (struct split_point *split_point)
798 VEC (tree, heap) *args_to_pass = NULL;
799 bitmap args_to_skip = BITMAP_ALLOC (NULL);
802 struct cgraph_node *node;
803 basic_block return_bb = find_return_bb ();
805 gimple_stmt_iterator gsi;
809 tree retval = NULL, real_retval = NULL;
810 bool split_part_return_p = false;
811 gimple last_stmt = NULL;
815 fprintf (dump_file, "\n\nSplitting function at:\n");
816 dump_split_point (dump_file, split_point);
819 /* Collect the parameters of new function and args_to_skip bitmap. */
820 for (parm = DECL_ARGUMENTS (current_function_decl);
821 parm; parm = TREE_CHAIN (parm), num++)
822 if (!is_gimple_reg (parm)
823 || !gimple_default_def (cfun, parm)
824 || !bitmap_bit_p (split_point->ssa_names_to_pass,
825 SSA_NAME_VERSION (gimple_default_def (cfun, parm))))
826 bitmap_set_bit (args_to_skip, num);
828 VEC_safe_push (tree, heap, args_to_pass, gimple_default_def (cfun, parm));
830 /* See if the split function will return. */
831 FOR_EACH_EDGE (e, ei, return_bb->preds)
832 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
835 split_part_return_p = true;
837 /* If we return, we will need the return block. */
838 if (return_bb != EXIT_BLOCK_PTR && split_part_return_p)
839 bitmap_set_bit (split_point->split_bbs, return_bb->index);
841 /* Now create the actual clone. */
842 rebuild_cgraph_edges ();
843 node = cgraph_function_versioning (cgraph_node (current_function_decl),
846 split_point->split_bbs,
847 split_point->entry_bb, "_part");
848 /* For usual cloning it is enough to clear builtin only when signature
849 changes. For partial inlining we however can not expect the part
850 of builtin implementation to have same semantic as the whole. */
851 if (DECL_BUILT_IN (node->decl))
853 DECL_BUILT_IN_CLASS (node->decl) = NOT_BUILT_IN;
854 DECL_FUNCTION_CODE (node->decl) = (enum built_in_function) 0;
856 cgraph_node_remove_callees (cgraph_node (current_function_decl));
857 if (!split_part_return_p)
858 TREE_THIS_VOLATILE (node->decl) = 1;
860 dump_function_to_file (node->decl, dump_file, dump_flags);
862 /* Create the basic block we place call into. It is the entry basic block
863 split after last label. */
864 call_bb = split_point->entry_bb;
865 for (gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
866 if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
868 last_stmt = gsi_stmt (gsi);
873 e = split_block (split_point->entry_bb, last_stmt);
876 /* Produce the call statement. */
877 gsi = gsi_last_bb (call_bb);
878 call = gimple_build_call_vec (node->decl, args_to_pass);
879 gimple_set_block (call, DECL_INITIAL (current_function_decl));
881 /* Update return value. This is bit tricky. When we do not return,
882 do nothing. When we return we might need to update return_bb
883 or produce a new return statement. */
884 if (!split_part_return_p)
885 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
888 e = make_edge (call_bb, return_bb,
889 return_bb == EXIT_BLOCK_PTR ? 0 : EDGE_FALLTHRU);
890 e->count = call_bb->count;
891 e->probability = REG_BR_PROB_BASE;
892 if (return_bb != EXIT_BLOCK_PTR)
894 gimple return_stmt = gsi_stmt (gsi_last_bb (return_bb));
895 gcc_assert (gimple_code (return_stmt) == GIMPLE_RETURN);
897 if ((real_retval = retval = gimple_return_retval (return_stmt))
898 && !is_gimple_min_invariant (retval)
899 && (TREE_CODE (retval) != SSA_NAME
900 || !SSA_NAME_IS_DEFAULT_DEF (retval)))
902 gimple_stmt_iterator psi;
904 /* See if there is PHI definind return value. */
905 for (psi = gsi_start_phis (return_bb);
906 !gsi_end_p (psi); gsi_next (&psi))
907 if (is_gimple_reg (gimple_phi_result (gsi_stmt (psi))))
910 /* When we have PHI, update PHI. When there is no PHI,
911 update the return statement itself. */
912 if (TREE_CODE (retval) == SSA_NAME)
914 retval = make_ssa_name (SSA_NAME_VAR (retval), call);
915 if (TREE_CODE (retval) == SSA_NAME
917 add_phi_arg (gsi_stmt (psi), retval, e, UNKNOWN_LOCATION);
918 else if (TREE_CODE (retval) == SSA_NAME)
920 gimple_return_set_retval (return_stmt, retval);
921 update_stmt (return_stmt);
924 gimple_call_set_lhs (call, retval);
926 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
931 if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
933 retval = DECL_RESULT (current_function_decl);
935 /* We use temporary register to hold value when aggregate_value_p
936 is false. Similarly for DECL_BY_REFERENCE we must avoid extra
938 if (!aggregate_value_p (retval, TREE_TYPE (current_function_decl))
939 && !DECL_BY_REFERENCE (retval))
940 retval = create_tmp_reg (TREE_TYPE (retval), NULL);
941 if (is_gimple_reg (retval))
942 retval = make_ssa_name (retval, call);
943 gimple_call_set_lhs (call, retval);
945 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
946 ret = gimple_build_return (retval);
947 gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
950 free_dominance_info (CDI_DOMINATORS);
951 free_dominance_info (CDI_POST_DOMINATORS);
952 compute_inline_parameters (node);
955 /* Execute function splitting pass. */
958 execute_split_functions (void)
960 gimple_stmt_iterator bsi;
962 int overall_time = 0, overall_size = 0;
964 struct cgraph_node *node = cgraph_node (current_function_decl);
966 if (flags_from_decl_or_type (current_function_decl) & ECF_NORETURN)
969 fprintf (dump_file, "Not splitting: noreturn function.\n");
972 if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
975 fprintf (dump_file, "Not splitting: main function.\n");
978 /* This can be relaxed; function might become inlinable after splitting
979 away the uninlinable part. */
980 if (!node->local.inlinable)
983 fprintf (dump_file, "Not splitting: not inlinable.\n");
986 if (node->local.disregard_inline_limits)
989 fprintf (dump_file, "Not splitting: disregading inline limits.\n");
992 /* This can be relaxed; most of versioning tests actually prevents
994 if (!tree_versionable_function_p (current_function_decl))
997 fprintf (dump_file, "Not splitting: not versionable.\n");
1000 /* FIXME: we could support this. */
1001 if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
1004 fprintf (dump_file, "Not splitting: nested function.\n");
1007 /* FIXME: Should be easy to support. */
1008 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1011 fprintf (dump_file, "Not splitting: returns value by reference.\n");
1015 /* See if it makes sense to try to split.
1016 It makes sense to split if we inline, that is if we have direct calls to
1017 handle or direct calls are possibly going to appear as result of indirect
1019 Note that we are not completely conservative about disqualifying functions
1020 called once. It is possible that the caller is called more then once and
1021 then inlining would still benefit. */
1022 if ((!node->callers || !node->callers->next_caller)
1023 && !node->address_taken
1024 && ((!flag_lto && !flag_whopr) || !node->local.externally_visible))
1027 fprintf (dump_file, "Not splitting: not called directly "
1028 "or called once.\n");
1032 /* FIXME: We can actually split if splitting reduces call overhead. */
1033 if (!flag_inline_small_functions
1034 && !DECL_DECLARED_INLINE_P (current_function_decl))
1037 fprintf (dump_file, "Not splitting: not autoinlining and function"
1038 " is not inline.\n");
1042 /* Compute local info about basic blocks and determine function size/time. */
1043 VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
1044 memset (&best_split_point, 0, sizeof (best_split_point));
1049 int freq = compute_call_stmt_bb_frequency (current_function_decl, bb);
1051 if (dump_file && (dump_flags & TDF_DETAILS))
1052 fprintf (dump_file, "Basic block %i\n", bb->index);
1054 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1056 int this_time, this_size;
1057 gimple stmt = gsi_stmt (bsi);
1059 this_size = estimate_num_insns (stmt, &eni_size_weights);
1060 this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1066 fprintf (dump_file, " freq:%6i size:%3i time:%3i ",
1067 freq, this_size, this_time);
1068 print_gimple_stmt (dump_file, stmt, 0, 0);
1071 overall_time += time;
1072 overall_size += size;
1073 VEC_index (bb_info, bb_info_vec, bb->index)->time = time;
1074 VEC_index (bb_info, bb_info_vec, bb->index)->size = size;
1076 find_split_points (overall_time, overall_size);
1077 if (best_split_point.split_bbs)
1079 split_function (&best_split_point);
1080 BITMAP_FREE (best_split_point.ssa_names_to_pass);
1081 BITMAP_FREE (best_split_point.split_bbs);
1082 todo = TODO_update_ssa | TODO_cleanup_cfg;
1084 VEC_free (bb_info, heap, bb_info_vec);
1090 gate_split_functions (void)
1092 return flag_partial_inlining;
1095 struct gimple_opt_pass pass_split_functions =
1099 "fnsplit", /* name */
1100 gate_split_functions, /* gate */
1101 execute_split_functions, /* execute */
1104 0, /* static_pass_number */
1105 TV_IPA_FNSPLIT, /* tv_id */
1106 PROP_cfg, /* properties_required */
1107 0, /* properties_provided */
1108 0, /* properties_destroyed */
1109 0, /* todo_flags_start */
1110 TODO_dump_func /* todo_flags_finish */