From 2d93887b0dfd20fdaa76be712b7f1838a1cdcaab Mon Sep 17 00:00:00 2001 From: hubicka Date: Wed, 4 Jun 2003 16:42:23 +0000 Subject: [PATCH] * i386.c (min_insn_size, k8_avoid_jump_misspredicts): New functions (ix86_reorg): Use it. * i386.md (align): New insn pattern. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@67446 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 6 +++ gcc/config/i386/i386.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/config/i386/i386.md | 14 ++++++ 3 files changed, 134 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0adc19a1db7..96a0ab80a70 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +Wed Jun 4 18:39:33 CEST 2003 Jan Hubicka + + * i386.c (min_insn_size, k8_avoid_jump_misspredicts): New functions + (ix86_reorg): Use it. + * i386.md (align): New insn pattern. + 2003-06-04 Kaveh R. Ghazi * toplev.c (rest_of_type_compilation): Fix typo. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7386ba55131..c96ac8c68c7 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -878,6 +878,8 @@ static bool ix86_ms_bitfield_layout_p PARAMS ((tree)); static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *)); static int extended_reg_mentioned_1 PARAMS ((rtx *, void *)); static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *)); +static int min_insn_size PARAMS ((rtx)); +static void k8_avoid_jump_misspredicts PARAMS ((void)); #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); @@ -15526,6 +15528,117 @@ x86_function_profiler (file, labelno) } } +/* We don't have exact information about the insn sizes, but we may assume + quite safely that we are informed about all 1 byte insns and memory + address sizes. This is enought to elliminate unnecesary padding in + 99% of cases. */ + +static int +min_insn_size (insn) + rtx insn; +{ + int l = 0; + + if (!INSN_P (insn) || !active_insn_p (insn)) + return 0; + + /* Discard alignments we've emit and jump instructions. */ + if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) + return 0; + if (GET_CODE (insn) == JUMP_INSN + && (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) + return 0; + + /* Important case - calls are always 5 bytes. + It is common to have many calls in the row. */ + if (GET_CODE (insn) == CALL_INSN + && symbolic_reference_mentioned_p (PATTERN (insn)) + && !SIBLING_CALL_P (insn)) + return 5; + if (get_attr_length (insn) <= 1) + return 1; + + /* For normal instructions we may rely on the sizes of addresses + and the presence of symbol to require 4 bytes of encoding. + This is not the case for jumps where references are PC relative. */ + if (GET_CODE (insn) != JUMP_INSN) + { + l = get_attr_length_address (insn); + if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) + l = 4; + } + if (l) + return 1+l; + else + return 2; +} + +/* AMD K8 core misspredicts jumps when there are more than 3 jumps in 16 byte + window. */ + +static void +k8_avoid_jump_misspredicts () +{ + rtx insn, start = get_insns (); + int nbytes = 0, njumps = 0; + int isjump = 0; + + /* Look for all minimal intervals of instructions containing 4 jumps. + The intervals are bounded by START and INSN. NBYTES is the total + size of instructions in the interval including INSN and not including + START. When the NBYTES is smaller than 16 bytes, it is possible + that the end of START and INSN ends up in the same 16byte page. + + The smallest offset in the page INSN can start is the case where START + ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). + We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). + */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + + nbytes += min_insn_size (insn); + if (rtl_dump_file) + fprintf(stderr,"Insn %i estimated to %i bytes\n", + INSN_UID (insn), min_insn_size (insn)); + if ((GET_CODE (insn) == JUMP_INSN + && GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) + || GET_CODE (insn) == CALL_INSN) + njumps++; + else + continue; + + while (njumps > 3) + { + start = NEXT_INSN (start); + if ((GET_CODE (start) == JUMP_INSN + && GET_CODE (PATTERN (start)) != ADDR_VEC + && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) + || GET_CODE (start) == CALL_INSN) + njumps--, isjump = 1; + else + isjump = 0; + nbytes -= min_insn_size (start); + } + if (njumps < 0) + abort (); + if (rtl_dump_file) + fprintf(stderr,"Interval %i to %i has %i bytes\n", + INSN_UID (start), INSN_UID (insn), nbytes); + + if (njumps == 3 && isjump && nbytes < 16) + { + int padsize = 15 - nbytes + min_insn_size (insn); + + if (rtl_dump_file) + fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize); + emit_insn_before (gen_align (GEN_INT (padsize)), insn); + } + } +} + /* Implement machine specific optimizations. At the moment we implement single transformation: AMD Athlon works faster when RET is not destination of conditional jump or directly preceded @@ -15577,6 +15690,7 @@ ix86_reorg () delete_insn (ret); } } + k8_avoid_jump_misspredicts (); } /* Return nonzero when QImode register that must be represented via REX prefix diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d192cc8b1cf..65b54ea9050 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -126,6 +126,7 @@ (UNSPECV_STMXCSR 40) (UNSPECV_FEMMS 46) (UNSPECV_CLFLUSH 57) + (UNSPECV_ALIGN 68) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls @@ -14278,6 +14279,19 @@ (set_attr "modrm" "0") (set_attr "ppro_uops" "one")]) +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "align" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)] + "" +{ +#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN + return ".p2align\t4,,%c0"; +#endif +} + [(set_attr "length" "16")]) + (define_expand "prologue" [(const_int 1)] "" -- 2.11.0