OSDN Git Service

* invoke.texi (-malign-double): Re-add lost warning.
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 15 May 2002 09:00:30 +0000 (09:00 +0000)
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 15 May 2002 09:00:30 +0000 (09:00 +0000)
* i386-protos.h (x86_output_mi_thunk): Declare.
* unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
* i386.c (x86_output_mi_thunk): ... here; handle 64bits.

* dwarf2out.c (output_call_frame_info): Do not skip unwind info
when flag_asynchronous_unwind_tables is set.

* flags.h (flag_reorder_functions): Declare.
* function.c (prepare_function_start): Initialize frequnecy.
* params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
* Makefile.in (predict.o): Add dependency on target.h and params.h
* defaults.h (HOT_TEXT_SECTION_NAME,
UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
* predict.c (choose_function_section): New function.
(estimate_bb_frequencies): Use it.
* toplev.c (flag_reorder_functions): New global variable.
(lang_independent_options): New.
(parse_options_and_default_flags): Set.
* varasm.c (assemble_start_function): Bypass functdion alignment
for never executed functions.
* invoke.texi (-freorder-blocks, -freorder-functions): Document.
(param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
* tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
Document.

Thu Jan  3 21:52:09 CET 2002  Jan Hubicka  <jh@suse.cz>

* predict.c: Inlude profile.h
(MIN_COUNT): Rename to MIN_COUNT_FRACTION
(maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
Use the information about maximal counter in the program.

Thu Dec 20 22:14:00 CET 2001  Jan Hubicka  <jh@suse.cz>

* basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
probably_never_executed_bb_p): New functions.
* cfgcleanup.c (outgoing_edges_match): Use them.
* predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
(maybe_hot_bb_p, probably_cold_bb_p,
probably_never_executed_bb_p): New functions.

* function.h (function): Add new field function_frequency.
* predict.c (compute_function_frequency): New function.
(estimate_probability): Call it.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@53478 138bc75d-0d04-0410-961f-82ee72b054a4

18 files changed:
gcc/ChangeLog
gcc/Makefile.in
gcc/basic-block.h
gcc/cfgcleanup.c
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.c
gcc/config/i386/unix.h
gcc/defaults.h
gcc/doc/invoke.texi
gcc/doc/tm.texi
gcc/dwarf2out.c
gcc/flags.h
gcc/function.c
gcc/function.h
gcc/params.def
gcc/predict.c
gcc/toplev.c
gcc/varasm.c

index 6a37fb5..f543a74 100644 (file)
@@ -1,3 +1,52 @@
+Wed May 15 10:38:27 CEST 2002  Jan Hubicka  <jh@suse.cz>
+
+       * invoke.texi (-malign-double): Re-add lost warning.
+
+       * i386-protos.h (x86_output_mi_thunk): Declare.
+       * unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
+       * i386.c (x86_output_mi_thunk): ... here; handle 64bits.
+
+       * dwarf2out.c (output_call_frame_info): Do not skip unwind info
+       when flag_asynchronous_unwind_tables is set.
+
+       * flags.h (flag_reorder_functions): Declare.
+       * function.c (prepare_function_start): Initialize frequnecy.
+       * params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
+       * Makefile.in (predict.o): Add dependency on target.h and params.h
+       * defaults.h (HOT_TEXT_SECTION_NAME,
+       UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
+       * predict.c (choose_function_section): New function.
+       (estimate_bb_frequencies): Use it.
+       * toplev.c (flag_reorder_functions): New global variable.
+       (lang_independent_options): New.
+       (parse_options_and_default_flags): Set.
+       * varasm.c (assemble_start_function): Bypass functdion alignment
+       for never executed functions.
+       * invoke.texi (-freorder-blocks, -freorder-functions): Document.
+       (param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
+       * tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
+       Document.
+
+       Thu Jan  3 21:52:09 CET 2002  Jan Hubicka  <jh@suse.cz>
+
+       * predict.c: Inlude profile.h
+       (MIN_COUNT): Rename to MIN_COUNT_FRACTION
+       (maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
+       Use the information about maximal counter in the program.
+
+       Thu Dec 20 22:14:00 CET 2001  Jan Hubicka  <jh@suse.cz>
+
+       * basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
+       probably_never_executed_bb_p): New functions.
+       * cfgcleanup.c (outgoing_edges_match): Use them.
+       * predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
+       (maybe_hot_bb_p, probably_cold_bb_p,
+       probably_never_executed_bb_p): New functions.
+
+       * function.h (function): Add new field function_frequency.
+       * predict.c (compute_function_frequency): New function.
+       (estimate_probability): Call it.
+
 2002-03-09  Jakub Jelinek  <jakub@redhat.com>
 
        PR optimization/5172, optimization/5200
index 937b6e5..5791e8e 100644 (file)
@@ -1579,7 +1579,8 @@ reg-stack.o : reg-stack.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) $(RECOG_H)
    varray.h function.h $(TM_P_H)
 predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h \
    insn-config.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h output.h toplev.h \
-   $(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h
+   $(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h \
+   $(PARAMS_H) $(TARGET_H)
 lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) toplev.h $(RTL_H) $(GGC_H)
 bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \
    flags.h $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h $(TARGET_H)
index 05b4b7c..5615b14 100644 (file)
@@ -628,6 +628,10 @@ extern rtx emit_block_insn_before  PARAMS ((rtx, rtx, basic_block));
 extern void estimate_probability        PARAMS ((struct loops *));
 extern void note_prediction_to_br_prob PARAMS ((void));
 extern void expected_value_to_br_prob  PARAMS ((void));
+extern void note_prediction_to_br_prob PARAMS ((void));
+extern bool maybe_hot_bb_p             PARAMS ((basic_block));
+extern bool probably_cold_bb_p         PARAMS ((basic_block));
+extern bool probably_never_executed_bb_p PARAMS ((basic_block));
 
 /* In flow.c */
 extern void init_flow                   PARAMS ((void));
index 826569a..fcf6944 100644 (file)
@@ -1211,8 +1211,8 @@ outgoing_edges_match (mode, bb1, bb2)
         roughly similar.  */
       if (match
          && !optimize_size
-         && bb1->frequency > BB_FREQ_MAX / 1000
-         && bb2->frequency > BB_FREQ_MAX / 1000)
+         && maybe_hot_bb_p (bb1)
+         && maybe_hot_bb_p (bb2))
        {
          int prob2;
 
index 8321d47..b3b1688 100644 (file)
@@ -197,4 +197,5 @@ extern tree ix86_handle_shared_attribute PARAMS ((tree *, tree, tree, int, bool
 extern unsigned int i386_pe_section_type_flags PARAMS ((tree, const char *,
                                                        int));
 extern void i386_pe_asm_named_section PARAMS ((const char *, unsigned int));
+extern void x86_output_mi_thunk PARAMS ((FILE *, int, tree));
 #endif
index 9c328b7..8d939f2 100644 (file)
@@ -13049,3 +13049,78 @@ x86_order_regs_for_local_alloc ()
    while (pos < FIRST_PSEUDO_REGISTER)
      reg_alloc_order [pos++] = 0;
 }
+
+void
+x86_output_mi_thunk (file, delta, function)
+     FILE *file;
+     int delta;
+     tree function;
+{
+  tree parm;
+  rtx xops[3];
+
+  if (ix86_regparm > 0)
+    parm = TYPE_ARG_TYPES (TREE_TYPE (function));
+  else
+    parm = NULL_TREE;
+  for (; parm; parm = TREE_CHAIN (parm))
+    if (TREE_VALUE (parm) == void_type_node)
+      break;
+
+  xops[0] = GEN_INT (delta);
+  if (TARGET_64BIT)
+    {
+      int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
+      xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
+      output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
+      if (flag_pic)
+       {
+         fprintf (file, "\tjmp *");
+         assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+         fprintf (file, "@GOTPCREL(%%rip)\n");
+       }
+      else
+       {
+         fprintf (file, "\tjmp ");
+         assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+         fprintf (file, "\n");
+       }
+    }
+  else
+    {
+      if (parm)
+       xops[1] = gen_rtx_REG (SImode, 0);
+      else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
+       xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
+      else
+       xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
+      output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
+
+      if (flag_pic)
+       {
+         xops[0] = pic_offset_table_rtx;
+         xops[1] = gen_label_rtx ();
+         xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+
+         if (ix86_regparm > 2)
+           abort ();
+         output_asm_insn ("push{l}\t%0", xops);
+         output_asm_insn ("call\t%P1", xops);
+         ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
+         output_asm_insn ("pop{l}\t%0", xops);
+         output_asm_insn
+           ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
+         xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
+         output_asm_insn
+           ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
+         asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
+         asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
+       }
+      else
+       {
+         fprintf (file, "\tjmp ");
+         assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+         fprintf (file, "\n");
+       }
+    }
+}
index 15a0701..f7e38b4 100644 (file)
@@ -79,57 +79,5 @@ Boston, MA 02111-1307, USA.  */
 
 /* Output code to add DELTA to the first argument, and then jump to FUNCTION.
    Used for C++ multiple inheritance.  */
-#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION)           \
-do {                                                                       \
-  tree parm;                                                               \
-  rtx xops[3];                                                             \
-                                                                           \
-  if (ix86_regparm > 0)                                                            \
-    parm = TYPE_ARG_TYPES (TREE_TYPE (function));                          \
-  else                                                                     \
-    parm = NULL_TREE;                                                      \
-  for (; parm; parm = TREE_CHAIN (parm))                                   \
-    if (TREE_VALUE (parm) == void_type_node)                               \
-      break;                                                               \
-                                                                           \
-  xops[0] = GEN_INT (DELTA);                                               \
-  if (parm)                                                                \
-    xops[1] = gen_rtx_REG (SImode, 0);                                     \
-  else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (FUNCTION))))           \
-    xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));   \
-  else                                                                     \
-    xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));   \
-  output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);                        \
-                                                                           \
-  if (flag_pic && !TARGET_64BIT)                                           \
-    {                                                                      \
-      xops[0] = pic_offset_table_rtx;                                      \
-      xops[1] = gen_label_rtx ();                                          \
-      xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");        \
-                                                                           \
-      if (ix86_regparm > 2)                                                \
-       abort ();                                                           \
-      output_asm_insn ("push{l}\t%0", xops);                               \
-      output_asm_insn ("call\t%P1", xops);                                 \
-      ASM_OUTPUT_INTERNAL_LABEL (FILE, "L", CODE_LABEL_NUMBER (xops[1]));   \
-      output_asm_insn ("pop{l}\t%0", xops);                                \
-      output_asm_insn ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); \
-      xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (FUNCTION), 0));       \
-      output_asm_insn ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}",\
-                      xops);                                               \
-      asm_fprintf (FILE, "\tpop{l\t%%ebx|\t%%ebx}\n");                     \
-      asm_fprintf (FILE, "\tjmp\t{*%%ecx|%%ecx}\n");                       \
-    }                                                                      \
-  else if (flag_pic && TARGET_64BIT)                                       \
-    {                                                                      \
-      fprintf (FILE, "\tjmp *");                                           \
-      assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0));       \
-      fprintf (FILE, "@GOTPCREL(%%rip)\n");                                \
-    }                                                                      \
-  else                                                                     \
-    {                                                                      \
-      fprintf (FILE, "\tjmp ");                                                    \
-      assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0));       \
-      fprintf (FILE, "\n");                                                \
-    }                                                                      \
-} while (0)
+#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
+    x86_output_mi_thunk (FILE, DELTA, FUNCTION);
index 7a45877..12f363a 100644 (file)
@@ -517,4 +517,12 @@ You Lose!  You must define PREFERRED_DEBUGGING_TYPE!
    && !ROUND_TOWARDS_ZERO)
 #endif
 
+#ifndef HOT_TEXT_SECTION_NAME
+#define HOT_TEXT_SECTION_NAME "text.hot"
+#endif
+
+#ifndef UNLIKELY_EXECUTED_TEXT_SECTION_NAME
+#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME "text.unlikely"
+#endif
+
 #endif  /* ! GCC_DEFAULTS_H */
index 001ab25..1190c97 100644 (file)
@@ -278,6 +278,7 @@ in the following sections.
 -fomit-frame-pointer  -foptimize-register-move @gol
 -foptimize-sibling-calls  -fprefetch-loop-arrays @gol
 -freduce-all-givs -fregmove  -frename-registers @gol
+-freorder-blocks -freorder-functions @gol
 -frerun-cse-after-loop  -frerun-loop-opt @gol
 -fschedule-insns  -fschedule-insns2 @gol
 -fsingle-precision-constant  -fssa -fssa-ccp -fssa-dce @gol
@@ -3712,6 +3713,23 @@ non-determinism is of paramount import.  This switch allows users to
 reduce non-determinism, possibly at the expense of inferior
 optimization.
 
+@item -freorder-blocks
+@opindex freorder-blocks
+Reorder basic blocks in the compiled function in order to reduce number of
+taken branches and improve code locality.
+
+@item -freorder-functions
+@opindex freorder-functions
+Reorder basic blocks in the compiled function in order to reduce number of
+taken branches and improve code locality. This is implemented by using special
+subsections @code{text.hot} for most frequently executed functions and
+@code{text.unlikely} for unlikely executed functions.  Reordering is done by
+the linker so object file format must support named sections and linker must
+place them in resonable way.
+
+Also profile feedback must be available in to make this option effective.  See
+@option{-fprofile-arcs} for details.
+
 @item -fstrict-aliasing
 @opindex fstrict-aliasing
 Allows the compiler to assume the strictest aliasing rules applicable to
@@ -3900,6 +3918,13 @@ The maximum number of instructions that a loop should have if that loop
 is unrolled, and if the loop is unrolled, it determines how many times
 the loop code is unrolled.
 
+@item hot-bb-count-fraction
+Select fraction of the maximal count of repetitions of basic block in program
+given basic block needs to have to be considered hot.
+
+@item hot-bb-frequency-fraction
+Select fraction of the maximal frequency of executions of basic block in
+function given basic block needs to have to be considered hot
 @end table
 @end table
 
@@ -7389,6 +7414,10 @@ boundary.  Aligning @code{double} variables on a two word boundary will
 produce code that runs somewhat faster on a @samp{Pentium} at the
 expense of more memory.
 
+@strong{Warning:} if you use the @samp{-malign-double} switch,
+structures containing the above types will be aligned differently than
+the published application binary interface specifications for the 386.
+
 @item -m128bit-long-double
 @opindex m128bit-long-double
 Control the size of @code{long double} type. i386 application binary interface
index 8d4e925..bbb2c50 100644 (file)
@@ -5651,6 +5651,17 @@ Normally this is not needed, as simply defining @code{TEXT_SECTION_ASM_OP}
 is enough.  The MIPS port uses this to sort all functions after all data
 declarations.
 
+@findex HOT_TEXT_SECTION_NAME
+@item HOT_TEXT_SECTION_NAME
+If defined, a C string constant for the name of the section containing most
+frequently executed functions of the program.  If not defined, GCC will provide
+a default definition if the target supports named sections.
+
+@findex UNLIKELY_EXECUTED_TEXT_SECTION_NAME
+@item UNLIKELY_EXECUTED_TEXT_SECTION_NAME
+If defined, a C string constant for the name of the section containing unlikely
+executed functions in the program.
+
 @findex DATA_SECTION_ASM_OP
 @item DATA_SECTION_ASM_OP
 A C expression whose value is a string, including spacing, containing the
index 47edc43..2fa9f64 100644 (file)
@@ -1968,7 +1968,8 @@ output_call_frame_info (for_eh)
       fde = &fde_table[i];
 
       /* Don't emit EH unwind info for leaf functions that don't need it.  */
-      if (for_eh && fde->nothrow && ! fde->uses_eh_lsda)
+      if (!flag_asynchronous_unwind_tables && for_eh && fde->nothrow
+         && !  fde->uses_eh_lsda)
        continue;
 
       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, FDE_LABEL, for_eh + i * 2);
index b9fca23..efcc771 100644 (file)
@@ -204,6 +204,10 @@ extern int flag_branch_probabilities;
 
 extern int flag_reorder_blocks;
 
+/* Nonzero if functions should be reordered.  */
+
+extern int flag_reorder_functions;
+
 /* Nonzero if registers should be renamed.  */
 
 extern int flag_rename_registers;
index 9f1c00a..5bd70a0 100644 (file)
@@ -6320,6 +6320,10 @@ prepare_function_start ()
 
   cfun->arc_profile = profile_arc_flag || flag_test_coverage;
 
+  cfun->arc_profile = profile_arc_flag || flag_test_coverage;
+
+  cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
+
   (*lang_hooks.function.init) (cfun);
   if (init_machine_status)
     (*init_machine_status) (cfun);
index bc789c8..912f851 100644 (file)
@@ -481,6 +481,19 @@ struct function
 
   /* Nonzero if code to initialize arg_pointer_save_area has been emited.  */
   unsigned int arg_pointer_save_area_init : 1;
+
+  /* How commonly executed the function is.  Initialized during branch
+     probabilities pass.  */
+  enum function_frequency {
+    /* This function most likely won't be executed at all.
+       (set only when profile feedback is available).  */
+    FUNCTION_FREQUENCY_UNLIKELY_EXECUTED,
+    /* The default value.  */
+    FUNCTION_FREQUENCY_NORMAL,
+    /* Optimize this function hard
+       (set only when profile feedback is available).  */
+    FUNCTION_FREQUENCY_HOT
+  } function_frequency;
 };
 
 /* The function currently being compiled.  */
index 2b2cfe6..de55ecc 100644 (file)
@@ -150,6 +150,15 @@ DEFPARAM(PARAM_MAX_UNROLLED_INSNS,
         "max-unrolled-insns",
         "The maximum number of instructions to consider to unroll in a loop",
         100)
+
+DEFPARAM(HOT_BB_COUNT_FRACTION,
+        "hot-bb-count-fraction",
+        "Select fraction of the maximal count of repetitions of basic block in program given basic block needs to have to be considered hot",
+        10000)
+DEFPARAM(HOT_BB_FREQUENCY_FRACTION,
+        "hot-bb-frequency-fraction",
+        "Select fraction of the maximal frequency of executions of basic block in function given basic block needs to have to be considered hot",
+        1000)
 /*
 Local variables:
 mode:c
index 5896c10..f457817 100644 (file)
@@ -45,7 +45,10 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include "recog.h"
 #include "expr.h"
 #include "predict.h"
+#include "profile.h"
 #include "real.h"
+#include "params.h"
+#include "target.h"
 
 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 0.5,
                    REAL_BB_FREQ_MAX.  */
@@ -75,6 +78,8 @@ static void process_note_predictions   PARAMS ((basic_block, int *, int *,
 static void process_note_prediction     PARAMS ((basic_block, int *, int *,
                                                   sbitmap *, int, int));
 static bool last_basic_block_p           PARAMS ((basic_block));
+static void compute_function_frequency  PARAMS ((void));
+static void choose_function_section     PARAMS ((void));
 
 /* Information we hold about each branch predictor.
    Filled using information from predict.def.  */
@@ -103,6 +108,54 @@ static const struct predictor_info predictor_info[]= {
   {NULL, 0, 0}
 };
 #undef DEF_PREDICTOR
+
+/* Return true in case BB can be CPU intensive and should be optimized
+   for maximal perofmrance.  */
+
+bool
+maybe_hot_bb_p (bb)
+     basic_block bb;
+{
+  if (profile_info.count_profiles_merged
+      && flag_branch_probabilities
+      && (bb->count
+         < profile_info.max_counter_in_program
+         / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
+    return false;
+  if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
+    return false;
+  return true;
+}
+
+/* Return true in case BB is cold and should be optimized for size.  */
+
+bool
+probably_cold_bb_p (bb)
+     basic_block bb;
+{
+  if (profile_info.count_profiles_merged
+      && flag_branch_probabilities
+      && (bb->count
+         < profile_info.max_counter_in_program
+         / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
+    return true;
+  if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
+    return true;
+  return false;
+}
+
+/* Return true in case BB is probably never executed.  */
+bool
+probably_never_executed_bb_p (bb)
+       basic_block bb;
+{
+  if (profile_info.count_profiles_merged
+      && flag_branch_probabilities)
+    return ((bb->count + profile_info.count_profiles_merged / 2)
+           / profile_info.count_profiles_merged) == 0;
+  return false;
+}
+
 /* Return true if the one of outgoing edges is already predicted by
    PREDICTOR.  */
 
@@ -1095,118 +1148,159 @@ estimate_bb_frequencies (loops)
   REAL_VALUE_TYPE freq_max;
   enum machine_mode double_mode = TYPE_MODE (double_type_node);
 
-  REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
-  REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
-  REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
-  REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
-  REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
+  if (flag_branch_probabilities)
+    counts_to_freqs ();
+  else
+    {
+      REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
+      REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
+      REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
+      REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
+      REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
 
-  REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
+      REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
 
-  REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
-  REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
+      REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
+      REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
 
-  mark_dfs_back_edges ();
-  if (flag_branch_probabilities)
-    {
-      counts_to_freqs ();
-      return;
-    }
+      mark_dfs_back_edges ();
+      /* Fill in the probability values in flowgraph based on the REG_BR_PROB
+         notes.  */
+      for (i = 0; i < n_basic_blocks; i++)
+       {
+         rtx last_insn = BLOCK_END (i);
 
-  /* Fill in the probability values in flowgraph based on the REG_BR_PROB
-     notes.  */
-  for (i = 0; i < n_basic_blocks; i++)
-    {
-      rtx last_insn = BLOCK_END (i);
+         if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
+             /* Avoid handling of conditional jumps jumping to fallthru edge.  */
+             || BASIC_BLOCK (i)->succ->succ_next == NULL)
+           {
+             /* We can predict only conditional jumps at the moment.
+                Expect each edge to be equally probable.
+                ?? In the future we want to make abnormal edges improbable.  */
+             int nedges = 0;
+             edge e;
 
-      if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
-         /* Avoid handling of conditional jumps jumping to fallthru edge.  */
-         || BASIC_BLOCK (i)->succ->succ_next == NULL)
+             for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+               {
+                 nedges++;
+                 if (e->probability != 0)
+                   break;
+               }
+             if (!e)
+               for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+                 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
+           }
+       }
+
+      ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
+
+      /* Set up block info for each basic block.  */
+      alloc_aux_for_blocks (sizeof (struct block_info_def));
+      alloc_aux_for_edges (sizeof (struct edge_info_def));
+      for (i = -2; i < n_basic_blocks; i++)
        {
-         /* We can predict only conditional jumps at the moment.
-            Expect each edge to be equally probable.
-            ?? In the future we want to make abnormal edges improbable.  */
-         int nedges = 0;
          edge e;
+         basic_block bb;
 
-         for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+         if (i == -2)
+           bb = ENTRY_BLOCK_PTR;
+         else if (i == -1)
+           bb = EXIT_BLOCK_PTR;
+         else
+           bb = BASIC_BLOCK (i);
+
+         BLOCK_INFO (bb)->tovisit = 0;
+         for (e = bb->succ; e; e = e->succ_next)
            {
-             nedges++;
-             if (e->probability != 0)
-               break;
+
+             REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
+                                  e->probability, 0, double_mode);
+             REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
+                              RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
+                              real_br_prob_base);
            }
-         if (!e)
-           for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
-             e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
        }
-    }
 
-  ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
+      /* First compute probabilities locally for each loop from innermost
+         to outermost to examine probabilities for back edges.  */
+      estimate_loops_at_level (loops->tree_root);
 
-  /* Set up block info for each basic block.  */
-  alloc_aux_for_blocks (sizeof (struct block_info_def));
-  alloc_aux_for_edges (sizeof (struct edge_info_def));
-  for (i = -2; i < n_basic_blocks; i++)
-    {
-      edge e;
-      basic_block bb;
+      /* Now fake loop around whole function to finalize probabilities.  */
+      for (i = 0; i < n_basic_blocks; i++)
+       BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
 
-      if (i == -2)
-       bb = ENTRY_BLOCK_PTR;
-      else if (i == -1)
-       bb = EXIT_BLOCK_PTR;
-      else
-       bb = BASIC_BLOCK (i);
+      BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
+      BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
+      propagate_freq (ENTRY_BLOCK_PTR);
 
-      BLOCK_INFO (bb)->tovisit = 0;
-      for (e = bb->succ; e; e = e->succ_next)
+      memcpy (&freq_max, &real_zero, sizeof (real_zero));
+      for (i = 0; i < n_basic_blocks; i++)
+       if (REAL_VALUES_LESS
+           (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
+         memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
+                 sizeof (freq_max));
+
+      for (i = -2; i < n_basic_blocks; i++)
        {
-       
-         REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
-                              e->probability, 0, double_mode);
-         REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
-                          RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
-                          real_br_prob_base);
-       }
-    }
+         basic_block bb;
+         REAL_VALUE_TYPE tmp;
 
-  /* First compute probabilities locally for each loop from innermost
-     to outermost to examine probabilities for back edges.  */
-  estimate_loops_at_level (loops->tree_root);
+         if (i == -2)
+           bb = ENTRY_BLOCK_PTR;
+         else if (i == -1)
+           bb = EXIT_BLOCK_PTR;
+         else
+           bb = BASIC_BLOCK (i);
 
-  /* Now fake loop around whole function to finalize probabilities.  */
-  for (i = 0; i < n_basic_blocks; i++)
-    BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
+         REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
+                          real_bb_freq_max);
+         REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
+         REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
+         bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
+       }
 
-  BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
-  BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
-  propagate_freq (ENTRY_BLOCK_PTR);
+      free_aux_for_blocks ();
+      free_aux_for_edges ();
+    }
+  compute_function_frequency ();
+  if (flag_reorder_functions)
+    choose_function_section ();
+}
 
-  memcpy (&freq_max, &real_zero, sizeof (real_zero));
+/* Decide whether function is hot, cold or unlikely executed.  */
+static void
+compute_function_frequency ()
+{
+  int i;
+  if (!profile_info.count_profiles_merged
+      || !flag_branch_probabilities)
+    return;
+  cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
   for (i = 0; i < n_basic_blocks; i++)
-    if (REAL_VALUES_LESS (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
-      memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
-             sizeof (freq_max));
-
-  for (i = -2; i < n_basic_blocks; i++)
     {
-      basic_block bb;
-      REAL_VALUE_TYPE tmp;
-
-      if (i == -2)
-       bb = ENTRY_BLOCK_PTR;
-      else if (i == -1)
-       bb = EXIT_BLOCK_PTR;
-      else
-       bb = BASIC_BLOCK (i);
-
-      REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
-                      real_bb_freq_max);
-      REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
-      REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
-      bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
+      basic_block bb = BASIC_BLOCK (i);
+      if (maybe_hot_bb_p (bb))
+       {
+         cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
+         return;
+       }
+      if (!probably_never_executed_bb_p (bb))
+       cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
     }
+}
 
-  free_aux_for_blocks ();
-  free_aux_for_edges ();
+/* Choose appropriate section for the function.  */
+static void
+choose_function_section ()
+{
+  if (DECL_SECTION_NAME (current_function_decl)
+      || !targetm.have_named_sections)
+    return;
+  if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
+    DECL_SECTION_NAME (current_function_decl) =
+      build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
+  if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+    DECL_SECTION_NAME (current_function_decl) =
+      build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
+                   UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
 }
index 9213730..5f5eb46 100644 (file)
@@ -381,6 +381,10 @@ int flag_branch_probabilities = 0;
 
 int flag_reorder_blocks = 0;
 
+/* Nonzero if functions should be reordered.  */
+
+int flag_reorder_functions = 0;
+
 /* Nonzero if registers should be renamed.  */
 
 int flag_rename_registers = 0;
@@ -1076,6 +1080,8 @@ static const lang_independent_options f_options[] =
    N_("Enable basic program profiling code") },
   {"reorder-blocks", &flag_reorder_blocks, 1,
    N_("Reorder basic blocks to improve code placement") },
+  {"reorder-functions", &flag_reorder_functions, 1,
+   N_("Reorder functions to improve code placement") },
   {"rename-registers", &flag_rename_registers, 1,
    N_("Do the register renaming optimization pass") },
   {"cprop-registers", &flag_cprop_registers, 1,
@@ -4657,6 +4663,7 @@ parse_options_and_default_flags (argc, argv)
       flag_strict_aliasing = 1;
       flag_delete_null_pointer_checks = 1;
       flag_reorder_blocks = 1;
+      flag_reorder_functions = 1;
     }
 
   if (optimize >= 3)
index 279dc90..6fb663e 100644 (file)
@@ -1197,7 +1197,8 @@ assemble_start_function (decl, fnname)
   /* Handle a user-specified function alignment.
      Note that we still need to align to FUNCTION_BOUNDARY, as above,
      because ASM_OUTPUT_MAX_SKIP_ALIGN might not do any alignment at all.  */
-  if (align_functions_log > align)
+  if (align_functions_log > align
+      && cfun->function_frequency != FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
     {
 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
       ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,