X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fconfig%2Fi386%2Fi386.c;h=61a619bed2a7f1d91a320f4a1b889f30a49c150b;hp=b99586a52aecb631c7ddf3ea8c8be187b13aded4;hb=becaf513f11a73ca428070e42ef9970aba4eda3e;hpb=6fc76bb02fac69d05787ea749a9cf8230b59afed diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b99586a52ae..61a619bed2a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -819,93 +819,6 @@ struct processor_costs amdfam10_cost = { 1, /* cond_not_taken_branch_cost. */ }; -struct processor_costs bdver1_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (2), /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (4), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (5)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (35), /* HI */ - COSTS_N_INSNS (51), /* SI */ - COSTS_N_INSNS (83), /* DI */ - COSTS_N_INSNS (83)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 9, /* MOVE_RATIO */ - 4, /* cost for loading QImode using movzbl */ - {3, 4, 3}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {3, 4, 3}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {4, 4, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {3, 3}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 4}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {4, 4, 3}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {4, 4, 5}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 3, /* MMX or SSE register to integer */ - /* On K8 - MOVD reg64, xmmreg Double FSTORE 4 - MOVD reg32, xmmreg Double FSTORE 4 - On AMDFAM10 - MOVD reg64, xmmreg Double FADD 3 - 1/1 1/1 - MOVD reg32, xmmreg Double FADD 3 - 1/1 1/1 */ - 64, /* size of l1 cache. */ - 1024, /* size of l2 cache. */ - 64, /* size of prefetch block */ - /* New AMD processors never drop prefetches; if they cannot be performed - immediately, they are queued. We set number of simultaneous prefetches - to a large constant to reflect this (it probably is not a good idea not - to limit number of prefetches at all, as their execution also takes some - time). */ - 100, /* number of parallel prefetches */ - 2, /* Branch cost */ - COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (4), /* cost of FMUL instruction. */ - COSTS_N_INSNS (19), /* cost of FDIV instruction. */ - COSTS_N_INSNS (2), /* cost of FABS instruction. */ - COSTS_N_INSNS (2), /* cost of FCHS instruction. */ - COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ - - /* BDVER1 has optimized REP instruction for medium sized blocks, but for - very small blocks it is better to use loop. For large blocks, libcall can - do nontemporary accesses and beat inline considerably. */ - {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, - {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - {{libcall, {{8, loop}, {24, unrolled_loop}, - {2048, rep_prefix_4_byte}, {-1, libcall}}}, - {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 2, /* vec_store_cost. */ - 2, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - static const struct processor_costs pentium4_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -1363,8 +1276,7 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_ATHLON (1< 1 && TARGET_64BIT) - flag_zee = 1; - if (TARGET_MACHO) /* The Darwin libraries never set errno, so we might as well avoid calling them when that's the only reason we would. */ @@ -7570,27 +7465,15 @@ standard_sse_constant_opcode (rtx insn, rtx x) case MODE_V4SF: return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; - else - return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0"; + return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0"; case MODE_TI: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; - else - return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0"; + return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0"; case MODE_V8SF: return "vxorps\t%x0, %x0, %x0"; case MODE_V4DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "vxorps\t%x0, %x0, %x0"; - else - return "vxorpd\t%x0, %x0, %x0"; + return "vxorpd\t%x0, %x0, %x0"; case MODE_OI: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "vxorps\t%x0, %x0, %x0"; - else - return "vpxor\t%x0, %x0, %x0"; + return "vpxor\t%x0, %x0, %x0"; default: break; } @@ -10960,7 +10843,7 @@ output_pic_addr_const (FILE *file, rtx x, int code) { /* We can use %d if the number is <32 bits and positive. */ if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) - fprintf (file, "0x%lx%08lx", + fprintf (file, "%#lx%08lx", (unsigned long) CONST_DOUBLE_HIGH (x), (unsigned long) CONST_DOUBLE_LOW (x)); else @@ -11131,10 +11014,7 @@ ix86_delegitimize_address (rtx x) || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL || !MEM_P (orig_x)) return orig_x; - x = XVECEXP (XEXP (x, 0), 0, 0); - if (GET_MODE (orig_x) != Pmode) - return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0); - return x; + return XVECEXP (XEXP (x, 0), 0, 0); } if (GET_CODE (x) != PLUS @@ -11201,8 +11081,6 @@ ix86_delegitimize_address (rtx x) else return orig_x; } - if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) - return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0); return result; } @@ -12129,7 +12007,7 @@ print_operand (FILE *file, rtx x, int code) if (ASSEMBLER_DIALECT == ASM_ATT) putc ('$', file); - fprintf (file, "0x%08lx", (long unsigned int) l); + fprintf (file, "%#08lx", (long unsigned int) l); } /* These float cases don't actually occur as immediate operands. */ @@ -13346,14 +13224,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) switch (GET_MODE_SIZE (mode)) { case 16: - /* If we're optimizing for size, movups is the smallest. */ - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_avx_movups (op0, op1)); - return; - } op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); emit_insn (gen_avx_movdqu (op0, op1)); @@ -13380,13 +13250,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) emit_insn (gen_avx_movups256 (op0, op1)); break; case V2DFmode: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_avx_movups (op0, op1)); - return; - } emit_insn (gen_avx_movupd (op0, op1)); break; case V4DFmode: @@ -13407,8 +13270,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (MEM_P (op1)) { /* If we're optimizing for size, movups is the smallest. */ - if (optimize_insn_for_size_p () - || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + if (optimize_insn_for_size_p ()) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); @@ -13431,13 +13293,13 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { rtx zero; - if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) - { - op0 = gen_lowpart (V2DFmode, op0); - op1 = gen_lowpart (V2DFmode, op1); - emit_insn (gen_sse2_movupd (op0, op1)); - return; - } + if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) + { + op0 = gen_lowpart (V2DFmode, op0); + op1 = gen_lowpart (V2DFmode, op1); + emit_insn (gen_sse2_movupd (op0, op1)); + return; + } /* When SSE registers are split into halves, we can avoid writing to the top half twice. */ @@ -13466,12 +13328,12 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) } else { - if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - return; + if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) + { + op0 = gen_lowpart (V4SFmode, op0); + op1 = gen_lowpart (V4SFmode, op1); + emit_insn (gen_sse_movups (op0, op1)); + return; } if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) @@ -13490,8 +13352,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) else if (MEM_P (op0)) { /* If we're optimizing for size, movups is the smallest. */ - if (optimize_insn_for_size_p () - || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + if (optimize_insn_for_size_p ()) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); @@ -13512,37 +13373,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (TARGET_SSE2 && mode == V2DFmode) { - if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) - { - op0 = gen_lowpart (V2DFmode, op0); - op1 = gen_lowpart (V2DFmode, op1); - emit_insn (gen_sse2_movupd (op0, op1)); - } - else - { - m = adjust_address (op0, DFmode, 0); - emit_insn (gen_sse2_storelpd (m, op1)); - m = adjust_address (op0, DFmode, 8); - emit_insn (gen_sse2_storehpd (m, op1)); - } + m = adjust_address (op0, DFmode, 0); + emit_insn (gen_sse2_storelpd (m, op1)); + m = adjust_address (op0, DFmode, 8); + emit_insn (gen_sse2_storehpd (m, op1)); } else { if (mode != V4SFmode) op1 = gen_lowpart (V4SFmode, op1); - - if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - emit_insn (gen_sse_movups (op0, op1)); - } - else - { - m = adjust_address (op0, V2SFmode, 0); - emit_insn (gen_sse_storelps (m, op1)); - m = adjust_address (op0, V2SFmode, 8); - emit_insn (gen_sse_storehps (m, op1)); - } + m = adjust_address (op0, V2SFmode, 0); + emit_insn (gen_sse_storelps (m, op1)); + m = adjust_address (op0, V2SFmode, 8); + emit_insn (gen_sse_storehps (m, op1)); } } else @@ -19862,7 +19705,6 @@ ix86_issue_rate (void) case PROCESSOR_NOCONA: case PROCESSOR_GENERIC32: case PROCESSOR_GENERIC64: - case PROCESSOR_BDVER1: return 3; case PROCESSOR_CORE2: @@ -20052,7 +19894,6 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) case PROCESSOR_ATHLON: case PROCESSOR_K8: case PROCESSOR_AMDFAM10: - case PROCESSOR_BDVER1: case PROCESSOR_ATOM: case PROCESSOR_GENERIC32: case PROCESSOR_GENERIC64: