#include "recog.h"
#include "expr.h"
#include "optabs.h"
+#include "diagnostic-core.h"
#include "toplev.h"
#include "basic-block.h"
#include "ggc.h"
#include "cselib.h"
#include "debug.h"
#include "dwarf2out.h"
-
+#include "sched-int.h"
static rtx legitimize_dllimport_symbol (rtx, bool);
#ifndef CHECK_STACK_LIMIT
COSTS_N_BYTES (2), /* variable shift costs */
COSTS_N_BYTES (3), /* constant shift costs */
{COSTS_N_BYTES (3), /* cost of starting multiply for QI */
- COSTS_N_BYTES (3), /* HI */
- COSTS_N_BYTES (3), /* SI */
- COSTS_N_BYTES (3), /* DI */
- COSTS_N_BYTES (5)}, /* other */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
- COSTS_N_BYTES (3), /* HI */
- COSTS_N_BYTES (3), /* SI */
- COSTS_N_BYTES (3), /* DI */
- COSTS_N_BYTES (5)}, /* other */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
COSTS_N_BYTES (3), /* cost of movsx */
COSTS_N_BYTES (3), /* cost of movzx */
0, /* "large" insn */
2, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 1, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 1, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 1, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 1, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
/* Processor costs (relative to an add) */
COSTS_N_INSNS (3), /* variable shift costs */
COSTS_N_INSNS (2), /* constant shift costs */
{COSTS_N_INSNS (6), /* cost of starting multiply for QI */
- COSTS_N_INSNS (6), /* HI */
- COSTS_N_INSNS (6), /* SI */
- COSTS_N_INSNS (6), /* DI */
- COSTS_N_INSNS (6)}, /* other */
+ COSTS_N_INSNS (6), /* HI */
+ COSTS_N_INSNS (6), /* SI */
+ COSTS_N_INSNS (6), /* DI */
+ COSTS_N_INSNS (6)}, /* other */
COSTS_N_INSNS (1), /* cost of multiply per each bit set */
{COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (23), /* HI */
- COSTS_N_INSNS (23), /* SI */
- COSTS_N_INSNS (23), /* DI */
- COSTS_N_INSNS (23)}, /* other */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (23), /* SI */
+ COSTS_N_INSNS (23), /* DI */
+ COSTS_N_INSNS (23)}, /* other */
COSTS_N_INSNS (3), /* cost of movsx */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
DUMMY_STRINGOP_ALGS},
{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (3), /* variable shift costs */
COSTS_N_INSNS (2), /* constant shift costs */
{COSTS_N_INSNS (12), /* cost of starting multiply for QI */
- COSTS_N_INSNS (12), /* HI */
- COSTS_N_INSNS (12), /* SI */
- COSTS_N_INSNS (12), /* DI */
- COSTS_N_INSNS (12)}, /* other */
+ COSTS_N_INSNS (12), /* HI */
+ COSTS_N_INSNS (12), /* SI */
+ COSTS_N_INSNS (12), /* DI */
+ COSTS_N_INSNS (12)}, /* other */
1, /* cost of multiply per each bit set */
{COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (40), /* HI */
- COSTS_N_INSNS (40), /* SI */
- COSTS_N_INSNS (40), /* DI */
- COSTS_N_INSNS (40)}, /* other */
+ COSTS_N_INSNS (40), /* HI */
+ COSTS_N_INSNS (40), /* SI */
+ COSTS_N_INSNS (40), /* DI */
+ COSTS_N_INSNS (40)}, /* other */
COSTS_N_INSNS (3), /* cost of movsx */
COSTS_N_INSNS (2), /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
DUMMY_STRINGOP_ALGS},
{{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (4), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (11), /* cost of starting multiply for QI */
- COSTS_N_INSNS (11), /* HI */
- COSTS_N_INSNS (11), /* SI */
- COSTS_N_INSNS (11), /* DI */
- COSTS_N_INSNS (11)}, /* other */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (11), /* SI */
+ COSTS_N_INSNS (11), /* DI */
+ COSTS_N_INSNS (11)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (25), /* HI */
- COSTS_N_INSNS (25), /* SI */
- COSTS_N_INSNS (25), /* DI */
- COSTS_N_INSNS (25)}, /* other */
+ COSTS_N_INSNS (25), /* HI */
+ COSTS_N_INSNS (25), /* SI */
+ COSTS_N_INSNS (25), /* DI */
+ COSTS_N_INSNS (25)}, /* other */
COSTS_N_INSNS (3), /* cost of movsx */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
- 6, /* cost for loading QImode using movzbl */
+ 6, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
DUMMY_STRINGOP_ALGS},
{{libcall, {{-1, rep_prefix_4_byte}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (4), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (4), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (4)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (4), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (17), /* HI */
- COSTS_N_INSNS (17), /* SI */
- COSTS_N_INSNS (17), /* DI */
- COSTS_N_INSNS (17)}, /* other */
+ COSTS_N_INSNS (17), /* HI */
+ COSTS_N_INSNS (17), /* SI */
+ COSTS_N_INSNS (17), /* DI */
+ COSTS_N_INSNS (17)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
- /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
- the alignment). For small blocks inline loop is still a noticeable win, for bigger
- blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
- more expensive startup time in CPU, but after 4K the difference is down in the noise.
- */
+ /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
+ (we ensure the alignment). For small blocks inline loop is still a
+ noticeable win, for bigger blocks either rep movsl or rep movsb is
+ way to go. Rep movsb has apparently more expensive startup time in CPU,
+ but after 4K the difference is down in the noise. */
{{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
{8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
DUMMY_STRINGOP_ALGS},
{{rep_prefix_4_byte, {{1024, unrolled_loop},
- {8192, rep_prefix_4_byte}, {-1, libcall}}},
+ {8192, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (2), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (7), /* SI */
- COSTS_N_INSNS (7), /* DI */
- COSTS_N_INSNS (7)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (7), /* SI */
+ COSTS_N_INSNS (7), /* DI */
+ COSTS_N_INSNS (7)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (23), /* HI */
- COSTS_N_INSNS (39), /* SI */
- COSTS_N_INSNS (39), /* DI */
- COSTS_N_INSNS (39)}, /* other */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (39), /* SI */
+ COSTS_N_INSNS (39), /* DI */
+ COSTS_N_INSNS (39)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
- 1, /* cost for loading QImode using movzbl */
+ 1, /* cost for loading QImode using movzbl */
{1, 1, 1}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
DUMMY_STRINGOP_ALGS},
{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (3), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (3), /* DI */
- COSTS_N_INSNS (3)}, /* other */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (18), /* HI */
- COSTS_N_INSNS (18), /* SI */
- COSTS_N_INSNS (18), /* DI */
- COSTS_N_INSNS (18)}, /* other */
+ COSTS_N_INSNS (18), /* HI */
+ COSTS_N_INSNS (18), /* SI */
+ COSTS_N_INSNS (18), /* DI */
+ COSTS_N_INSNS (18)}, /* other */
COSTS_N_INSNS (2), /* cost of movsx */
COSTS_N_INSNS (2), /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
- 3, /* cost for loading QImode using movzbl */
+ 3, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
DUMMY_STRINGOP_ALGS},
{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (5), /* cost of starting multiply for QI */
- COSTS_N_INSNS (5), /* HI */
- COSTS_N_INSNS (5), /* SI */
- COSTS_N_INSNS (5), /* DI */
- COSTS_N_INSNS (5)}, /* other */
+ COSTS_N_INSNS (5), /* HI */
+ COSTS_N_INSNS (5), /* SI */
+ COSTS_N_INSNS (5), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
DUMMY_STRINGOP_ALGS},
{{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
COSTS_N_INSNS (2), /* cost of FABS instruction. */
COSTS_N_INSNS (2), /* cost of FCHS instruction. */
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
- /* K8 has optimized REP instruction for medium sized blocks, but for very small
- blocks it is better to use loop. For large blocks, libcall can do
- nontemporary accesses and beat inline considerably. */
+ /* K8 has optimized REP instruction for medium sized blocks, but for very
+ small blocks it is better to use loop. For large blocks, libcall can
+ do nontemporary accesses and beat inline considerably. */
{{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
{libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{{libcall, {{8, loop}, {24, unrolled_loop},
{2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 4, /* scalar_stmt_cost. */
- 2, /* scalar load_cost. */
- 2, /* scalar_store_cost. */
- 5, /* vec_stmt_cost. */
- 0, /* vec_to_scalar_cost. */
- 2, /* scalar_to_vec_cost. */
- 2, /* vec_align_load_cost. */
- 3, /* vec_unalign_load_cost. */
- 3, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 2, /* cond_not_taken_branch_cost. */
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 5, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 3, /* vec_unalign_load_cost. */
+ 3, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 2, /* cond_not_taken_branch_cost. */
};
struct processor_costs amdfam10_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (35), /* HI */
- COSTS_N_INSNS (51), /* SI */
- COSTS_N_INSNS (83), /* DI */
- COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{4, 4, 5}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
- /* On K8
- MOVD reg64, xmmreg Double FSTORE 4
- MOVD reg32, xmmreg Double FSTORE 4
- On AMDFAM10
- MOVD reg64, xmmreg Double FADD 3
- 1/1 1/1
- MOVD reg32, xmmreg Double FADD 3
- 1/1 1/1 */
+ /* On K8:
+ MOVD reg64, xmmreg Double FSTORE 4
+ MOVD reg32, xmmreg Double FSTORE 4
+ On AMDFAM10:
+ MOVD reg64, xmmreg Double FADD 3
+ 1/1 1/1
+ MOVD reg32, xmmreg Double FADD 3
+ 1/1 1/1 */
64, /* size of l1 cache. */
512, /* size of l2 cache. */
64, /* size of prefetch block */
{{libcall, {{8, loop}, {24, unrolled_loop},
{2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 4, /* scalar_stmt_cost. */
- 2, /* scalar load_cost. */
- 2, /* scalar_store_cost. */
- 6, /* vec_stmt_cost. */
- 0, /* vec_to_scalar_cost. */
- 2, /* scalar_to_vec_cost. */
- 2, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 2, /* vec_store_cost. */
- 2, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 6, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 2, /* vec_store_cost. */
+ 2, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
struct processor_costs bdver1_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (35), /* HI */
- COSTS_N_INSNS (51), /* SI */
- COSTS_N_INSNS (83), /* DI */
- COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{4, 4, 5}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
- /* On K8
- MOVD reg64, xmmreg Double FSTORE 4
- MOVD reg32, xmmreg Double FSTORE 4
- On AMDFAM10
- MOVD reg64, xmmreg Double FADD 3
- 1/1 1/1
- MOVD reg32, xmmreg Double FADD 3
- 1/1 1/1 */
+ /* On K8:
+ MOVD reg64, xmmreg Double FSTORE 4
+ MOVD reg32, xmmreg Double FSTORE 4
+ On AMDFAM10:
+ MOVD reg64, xmmreg Double FADD 3
+ 1/1 1/1
+ MOVD reg32, xmmreg Double FADD 3
+ 1/1 1/1 */
64, /* size of l1 cache. */
1024, /* size of l2 cache. */
64, /* size of prefetch block */
COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
/* BDVER1 has optimized REP instruction for medium sized blocks, but for
- very small blocks it is better to use loop. For large blocks, libcall can
- do nontemporary accesses and beat inline considerably. */
+ very small blocks it is better to use loop. For large blocks, libcall
+ can do nontemporary accesses and beat inline considerably. */
{{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
{libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{{libcall, {{8, loop}, {24, unrolled_loop},
{2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 4, /* scalar_stmt_cost. */
- 2, /* scalar load_cost. */
- 2, /* scalar_store_cost. */
- 6, /* vec_stmt_cost. */
- 0, /* vec_to_scalar_cost. */
- 2, /* scalar_to_vec_cost. */
- 2, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 2, /* vec_store_cost. */
- 2, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 6, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 2, /* vec_store_cost. */
+ 2, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (4), /* variable shift costs */
COSTS_N_INSNS (4), /* constant shift costs */
{COSTS_N_INSNS (15), /* cost of starting multiply for QI */
- COSTS_N_INSNS (15), /* HI */
- COSTS_N_INSNS (15), /* SI */
- COSTS_N_INSNS (15), /* DI */
- COSTS_N_INSNS (15)}, /* other */
+ COSTS_N_INSNS (15), /* HI */
+ COSTS_N_INSNS (15), /* SI */
+ COSTS_N_INSNS (15), /* DI */
+ COSTS_N_INSNS (15)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (56), /* HI */
- COSTS_N_INSNS (56), /* SI */
- COSTS_N_INSNS (56), /* DI */
- COSTS_N_INSNS (56)}, /* other */
+ COSTS_N_INSNS (56), /* HI */
+ COSTS_N_INSNS (56), /* SI */
+ COSTS_N_INSNS (56), /* DI */
+ COSTS_N_INSNS (56)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
6, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
{-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (10), /* cost of starting multiply for QI */
- COSTS_N_INSNS (10), /* HI */
- COSTS_N_INSNS (10), /* SI */
- COSTS_N_INSNS (10), /* DI */
- COSTS_N_INSNS (10)}, /* other */
+ COSTS_N_INSNS (10), /* HI */
+ COSTS_N_INSNS (10), /* SI */
+ COSTS_N_INSNS (10), /* DI */
+ COSTS_N_INSNS (10)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (66), /* HI */
- COSTS_N_INSNS (66), /* SI */
- COSTS_N_INSNS (66), /* DI */
- COSTS_N_INSNS (66)}, /* other */
+ COSTS_N_INSNS (66), /* HI */
+ COSTS_N_INSNS (66), /* SI */
+ COSTS_N_INSNS (66), /* DI */
+ COSTS_N_INSNS (66)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
16, /* "large" insn */
17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{-1, libcall}}},
{libcall, {{24, loop}, {64, unrolled_loop},
{8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (3), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (3), /* DI */
- COSTS_N_INSNS (3)}, /* other */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (22), /* HI */
- COSTS_N_INSNS (22), /* SI */
- COSTS_N_INSNS (22), /* DI */
- COSTS_N_INSNS (22)}, /* other */
+ COSTS_N_INSNS (22), /* HI */
+ COSTS_N_INSNS (22), /* SI */
+ COSTS_N_INSNS (22), /* DI */
+ COSTS_N_INSNS (22)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
16, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{6, 6, 6}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{24, loop}, {32, unrolled_loop},
{8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
+ 2, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
{{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
{libcall, {{32, loop}, {64, rep_prefix_4_byte},
- {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{{libcall, {{8, loop}, {15, unrolled_loop},
- {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{24, loop}, {32, unrolled_loop},
- {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
/* Generic64 should produce code tuned for Nocona and K8. */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
512, /* size of l2 cache. */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
- /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
- is increased to perhaps more appropriate value of 5. */
+ /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
+ value is increased to perhaps more appropriate value of 5. */
3, /* Branch cost */
COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (8), /* cost of FMUL instruction. */
{libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{DUMMY_STRINGOP_ALGS,
{libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
-/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
+/* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
+ Athlon and K8. */
static const
struct processor_costs generic32_cost = {
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (1), /* variable shift costs */
COSTS_N_INSNS (1), /* constant shift costs */
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
0, /* cost of multiply per each bit set */
{COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (1), /* cost of movzx */
8, /* "large" insn */
17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
+ 4, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
DUMMY_STRINGOP_ALGS},
{{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
- 1, /* scalar_stmt_cost. */
- 1, /* scalar load_cost. */
- 1, /* scalar_store_cost. */
- 1, /* vec_stmt_cost. */
- 1, /* vec_to_scalar_cost. */
- 1, /* scalar_to_vec_cost. */
- 1, /* vec_align_load_cost. */
- 2, /* vec_unalign_load_cost. */
- 1, /* vec_store_cost. */
- 3, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
const struct processor_costs *ix86_cost = &pentium_cost;
/* X86_TUNE_PROMOTE_HI_REGS */
m_PPRO,
- /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
- m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
- | m_CORE2 | m_GENERIC,
+ /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
+ over esp addition. */
+ m_386 | m_486 | m_PENT | m_PPRO,
- /* X86_TUNE_ADD_ESP_8 */
- m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
- | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
+ over esp addition. */
+ m_PENT,
- /* X86_TUNE_SUB_ESP_4 */
- m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
- | m_GENERIC,
+ /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
+ over esp subtraction. */
+ m_386 | m_486 | m_PENT | m_K6_GEODE,
- /* X86_TUNE_SUB_ESP_8 */
- m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
- | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
+ over esp subtraction. */
+ m_PENT | m_K6_GEODE,
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
m_ATOM,
+
+ /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
+ instructions. */
+ ~m_ATOM,
};
/* Feature tests against the various architecture variations. */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
};
-/* Test and compare insns in i386.md store the information needed to
- generate branch and scc insns here. */
-
-rtx ix86_compare_op0 = NULL_RTX;
-rtx ix86_compare_op1 = NULL_RTX;
-
/* Define parameter passing and return registers. */
static int const x86_64_int_parameter_registers[6] =
Stack grows downward:
[arguments]
- <- ARG_POINTER
+ <- ARG_POINTER
saved pc
- saved frame pointer if frame_pointer_needed
- <- HARD_FRAME_POINTER
- [saved regs]
+ saved static chain if ix86_static_chain_on_stack
+ saved frame pointer if frame_pointer_needed
+ <- HARD_FRAME_POINTER
+ [saved regs]
+ <- regs_save_offset
[padding0]
[saved SSE regs]
-
- [padding1] \
- )
- [va_arg registers] (
- > to_allocate <- FRAME_POINTER
- [frame] (
- )
- [padding2] /
+ <- sse_regs_save_offset
+ [padding1] |
+ | <- FRAME_POINTER
+ [va_arg registers] |
+ |
+ [frame] |
+ |
+ [padding2] | = to_allocate
+ <- STACK_POINTER
*/
struct ix86_frame
{
- int padding0;
int nsseregs;
int nregs;
- int padding1;
int va_arg_size;
- HOST_WIDE_INT frame;
- int padding2;
- int outgoing_arguments_size;
int red_zone_size;
+ int outgoing_arguments_size;
+ HOST_WIDE_INT frame;
- HOST_WIDE_INT to_allocate;
/* The offsets relative to ARG_POINTER. */
HOST_WIDE_INT frame_pointer_offset;
HOST_WIDE_INT hard_frame_pointer_offset;
HOST_WIDE_INT stack_pointer_offset;
+ HOST_WIDE_INT reg_save_offset;
+ HOST_WIDE_INT sse_reg_save_offset;
/* When save_regs_using_mov is set, emit prologue using
move instead of push instructions. */
= "force_align_arg_pointer";
static rtx (*ix86_gen_leave) (void);
-static rtx (*ix86_gen_pop1) (rtx);
static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
+static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
+static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
+static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
/* Preferred alignment for stack boundary in bits. */
unsigned int ix86_preferred_stack_boundary;
rtx, rtx, int);
static void ix86_add_new_builtins (int);
static rtx ix86_expand_vec_perm_builtin (tree);
+static tree ix86_canonical_va_list_type (tree);
enum ix86_function_specific_strings
{
#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
+#define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
+#define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_F16C_SET \
+ (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
+
/* Define a set of ISAs which aren't available when a given ISA is
disabled. MMX and SSE ISAs are handled separately. */
(OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
#define OPTION_MASK_ISA_AVX_UNSET \
(OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
- | OPTION_MASK_ISA_FMA4_UNSET)
+ | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
+#define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
+#define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
+
/* Vectorization library interface and handlers. */
-tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
+static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
+
static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
"bdver1"
};
\f
+/* Return true if a red-zone is in use. */
+
+static inline bool
+ix86_using_red_zone (void)
+{
+ return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
+}
+
/* Implement TARGET_HANDLE_OPTION. */
static bool
}
return true;
+ case OPT_mfsgsbase:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
+ }
+ return true;
+
+ case OPT_mrdrnd:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
+ }
+ return true;
+
+ case OPT_mf16c:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
+ }
+ return true;
+
default:
return true;
}
{ "-mcrc32", OPTION_MASK_ISA_CRC32 },
{ "-maes", OPTION_MASK_ISA_AES },
{ "-mpclmul", OPTION_MASK_ISA_PCLMUL },
+ { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
+ { "-mrdrnd", OPTION_MASK_ISA_RDRND },
+ { "-mf16c", OPTION_MASK_ISA_F16C },
};
/* Flag options. */
return ret;
}
+/* Return TRUE if software prefetching is beneficial for the
+ given CPU. */
+
+static bool
+software_prefetching_beneficial_p (void)
+{
+ switch (ix86_tune)
+ {
+ case PROCESSOR_GEODE:
+ case PROCESSOR_K6:
+ case PROCESSOR_ATHLON:
+ case PROCESSOR_K8:
+ case PROCESSOR_AMDFAM10:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* Return true, if profiling code should be emitted before
+ prologue. Otherwise it returns false.
+ Note: For x86 with "hotfix" it is sorried. */
+static bool
+ix86_profile_before_prologue (void)
+{
+ return flag_fentry != 0;
+}
+
/* Function that is callable from the debugger to print the current
options. */
void
{
int i;
unsigned int ix86_arch_mask, ix86_tune_mask;
- const bool ix86_tune_specified = (ix86_tune_string != NULL);
+ const bool ix86_tune_specified = (ix86_tune_string != NULL);
const char *prefix;
const char *suffix;
const char *sw;
PTA_MOVBE = 1 << 20,
PTA_FMA4 = 1 << 21,
PTA_XOP = 1 << 22,
- PTA_LWP = 1 << 23
+ PTA_LWP = 1 << 23,
+ PTA_FSGSBASE = 1 << 24,
+ PTA_RDRND = 1 << 25,
+ PTA_F16C = 1 << 26
};
static struct pta
{"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
- | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
| PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
{"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
0 /* flags are only used for -march switch. */ },
if (TARGET_MACHO && TARGET_64BIT)
flag_pic = 2;
- /* Set the default values for switches whose default depends on TARGET_64BIT
- in case they weren't overwritten by command line options. */
- if (TARGET_64BIT)
- {
- /* Mach-O doesn't support omitting the frame pointer for now. */
- if (flag_omit_frame_pointer == 2)
- flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
- if (flag_asynchronous_unwind_tables == 2)
- flag_asynchronous_unwind_tables = 1;
- if (flag_pcc_struct_return == 2)
- flag_pcc_struct_return = 0;
- }
- else
- {
- if (flag_omit_frame_pointer == 2)
- flag_omit_frame_pointer = 0;
- if (flag_asynchronous_unwind_tables == 2)
- flag_asynchronous_unwind_tables = 0;
- if (flag_pcc_struct_return == 2)
- flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
- }
-
/* Need to check -mtune=generic first. */
if (ix86_tune_string)
{
if (processor_alias_table[i].flags & PTA_PCLMUL
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
+ if (processor_alias_table[i].flags & PTA_FSGSBASE
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
+ ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
+ if (processor_alias_table[i].flags & PTA_RDRND
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
+ ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
+ if (processor_alias_table[i].flags & PTA_F16C
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
+ ix86_isa_flags |= OPTION_MASK_ISA_F16C;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
for (i = 0; i < X86_TUNE_LAST; ++i)
ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
+#ifndef USE_IX86_FRAME_POINTER
+#define USE_IX86_FRAME_POINTER 0
+#endif
+
+ /* Set the default values for switches whose default depends on TARGET_64BIT
+ in case they weren't overwritten by command line options. */
+ if (TARGET_64BIT)
+ {
+ if (flag_zee == 2)
+ flag_zee = 1;
+ if (flag_omit_frame_pointer == 2)
+ flag_omit_frame_pointer = 1;
+ if (flag_asynchronous_unwind_tables == 2)
+ flag_asynchronous_unwind_tables = 1;
+ if (flag_pcc_struct_return == 2)
+ flag_pcc_struct_return = 0;
+ }
+ else
+ {
+ if (flag_zee == 2)
+ flag_zee = 0;
+ if (flag_omit_frame_pointer == 2)
+ flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
+ if (flag_asynchronous_unwind_tables == 2)
+ flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
+ if (flag_pcc_struct_return == 2)
+ flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
+ }
+
if (optimize_size)
ix86_cost = &ix86_size_cost;
else
prefix, suffix, sw);
}
- if ((x86_accumulate_outgoing_args & ix86_tune_mask)
+ if ((!USE_IX86_FRAME_POINTER
+ || (x86_accumulate_outgoing_args & ix86_tune_mask))
&& !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
&& !optimize_size)
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
+ /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
+ if (flag_prefetch_loop_arrays < 0
+ && HAVE_prefetch
+ && optimize >= 3
+ && software_prefetching_beneficial_p ())
+ flag_prefetch_loop_arrays = 1;
+
/* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
can be optimized to ap = __builtin_next_arg (0). */
if (!TARGET_64BIT)
if (TARGET_64BIT)
{
ix86_gen_leave = gen_leave_rex64;
- ix86_gen_pop1 = gen_popdi1;
ix86_gen_add3 = gen_adddi3;
ix86_gen_sub3 = gen_subdi3;
ix86_gen_sub3_carry = gen_subdi3_carry;
ix86_gen_one_cmpl2 = gen_one_cmpldi2;
ix86_gen_monitor = gen_sse3_monitor64;
ix86_gen_andsp = gen_anddi3;
+ ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
+ ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
+ ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
}
else
{
ix86_gen_leave = gen_leave;
- ix86_gen_pop1 = gen_popsi1;
ix86_gen_add3 = gen_addsi3;
ix86_gen_sub3 = gen_subsi3;
ix86_gen_sub3_carry = gen_subsi3_carry;
ix86_gen_one_cmpl2 = gen_one_cmplsi2;
ix86_gen_monitor = gen_sse3_monitor;
ix86_gen_andsp = gen_andsi3;
+ ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
+ ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
+ ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
}
#ifdef USE_IX86_CLD
target_flags |= MASK_CLD & ~target_flags_explicit;
#endif
+ if (!TARGET_64BIT && flag_pic)
+ {
+ if (flag_fentry > 0)
+ sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
+ flag_fentry = 0;
+ }
+ if (flag_fentry < 0)
+ {
+#if defined(PROFILE_BEFORE_PROLOGUE)
+ flag_fentry = 1;
+#else
+ flag_fentry = 0;
+#endif
+ }
+
/* Save the initial options in case the user does function specific options */
if (main_args_p)
target_option_default_node = target_option_current_node
IX86_ATTR_ISA ("fma4", OPT_mfma4),
IX86_ATTR_ISA ("xop", OPT_mxop),
IX86_ATTR_ISA ("lwp", OPT_mlwp),
+ IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
+ IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
+ IX86_ATTR_ISA ("f16c", OPT_mf16c),
/* string options */
IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
/* We don't split these for medium model. Place them into
default sections and hope for best. */
break;
- case SECCAT_EMUTLS_VAR:
- case SECCAT_EMUTLS_TMPL:
- gcc_unreachable ();
}
if (sname)
{
/* We don't split these for medium model. Place them into
default sections and hope for best. */
break;
- case SECCAT_EMUTLS_VAR:
- prefix = targetm.emutls.var_section;
- break;
- case SECCAT_EMUTLS_TMPL:
- prefix = targetm.emutls.tmpl_section;
- break;
}
if (prefix)
{
name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
name = targetm.strip_name_encoding (name);
-
+
/* If we're using one_only, then there needs to be a .gnu.linkonce
prefix to the section name. */
linkonce = one_only ? ".gnu.linkonce" : "";
-
+
string = ACONCAT ((linkonce, prefix, ".", name, NULL));
-
+
DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
return;
}
flag_schedule_insns = 0;
#endif
- /* For -O2 and beyond, turn on -fzee for x86_64 target. */
- if (level > 1 && TARGET_64BIT)
- flag_zee = 1;
-
if (TARGET_MACHO)
/* The Darwin libraries never set errno, so we might as well
avoid calling them when that's the only reason we would. */
specifying them, we will set the defaults in override_options. */
if (optimize >= 1)
flag_omit_frame_pointer = 2;
+
+ /* For -O2 and beyond, turn on -fzee for x86_64 target. */
+ if (level > 1)
+ flag_zee = 2;
+
flag_pcc_struct_return = 2;
flag_asynchronous_unwind_tables = 2;
flag_vect_cost_model = 1;
SUBTARGET_OPTIMIZATION_OPTIONS;
#endif
}
+
+/* Decide whether we must probe the stack before any space allocation
+ on this target. It's essentially TARGET_STACK_PROBE except when
+ -fstack-check causes the stack to be already probed differently. */
+
+bool
+ix86_target_stack_probe (void)
+{
+ /* Do not probe the stack twice if static stack checking is enabled. */
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+ return false;
+
+ return TARGET_STACK_PROBE;
+}
\f
/* Decide whether we can make a sibling call to a function. DECL is the
declaration of the function being targeted by the call and EXP is the
if (decl
&& TREE_CODE (decl) == FUNCTION_DECL
&& optimize
- && !profile_flag)
+ && !(profile_flag && !flag_fentry))
{
/* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
/* For local functions, pass up to SSE_REGPARM_MAX SFmode
(and DFmode for SSE2) arguments in SSE registers. */
- if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
+ if (decl && TARGET_SSE_MATH && optimize
+ && !(profile_flag && !flag_fentry))
{
/* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
The attribute stdcall is equivalent to RTD on a per module basis. */
-int
+static int
ix86_return_pops_args (tree fundecl, tree funtype, int size)
{
int rtd;
}
static bool
-ix86_function_ms_hook_prologue (const_tree fntype)
+ix86_function_ms_hook_prologue (const_tree fn)
{
- if (!TARGET_64BIT)
+ if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
{
- if (lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
- {
- if (decl_function_context (fntype) != NULL_TREE)
- {
- error_at (DECL_SOURCE_LOCATION (fntype),
- "ms_hook_prologue is not compatible with nested function");
- }
-
- return true;
- }
+ if (decl_function_context (fn) != NULL_TREE)
+ error_at (DECL_SOURCE_LOCATION (fn),
+ "ms_hook_prologue is not compatible with nested function");
+ else
+ return true;
}
return false;
}
return cfun->machine->call_abi;
}
+/* Write the extra assembler code needed to declare a function properly. */
+
+void
+ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
+ tree decl)
+{
+ bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
+
+ if (is_ms_hook)
+ {
+ int i, filler_count = (TARGET_64BIT ? 32 : 16);
+ unsigned int filler_cc = 0xcccccccc;
+
+ for (i = 0; i < filler_count; i += 4)
+ fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
+ }
+
+ ASM_OUTPUT_LABEL (asm_out_file, fname);
+
+ /* Output magic byte marker, if hot-patch attribute is set. */
+ if (is_ms_hook)
+ {
+ if (TARGET_64BIT)
+ {
+ /* leaq [%rsp + 0], %rsp */
+ asm_fprintf (asm_out_file, ASM_BYTE
+ "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
+ }
+ else
+ {
+ /* movl.s %edi, %edi
+ push %ebp
+ movl.s %esp, %ebp */
+ asm_fprintf (asm_out_file, ASM_BYTE
+ "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
+ }
+ }
+}
+
/* regclass.c */
extern void init_regs (void);
NULL. */
static enum machine_mode
-type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
+type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
{
enum machine_mode mode = TYPE_MODE (type);
static bool warnedavx;
if (cum
- && !warnedavx
+ && !warnedavx
&& cum->warn_avx)
{
warnedavx = true;
{
case RECORD_TYPE:
/* And now merge the fields of structure. */
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL)
{
== NULL_TREE))
{
static bool warned;
-
+
if (!warned && warn_psabi)
{
warned = true;
case QUAL_UNION_TYPE:
/* Unions are similar to RECORD_TYPE but offset is always 0.
*/
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL)
{
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
if (mode != BLKmode)
- return gen_reg_or_parallel (mode, orig_mode,
+ return gen_reg_or_parallel (mode, orig_mode,
SSE_REGNO (sse_regno));
break;
case X86_64_X87_CLASS:
static void
function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
+ const_tree type, HOST_WIDE_INT bytes,
+ HOST_WIDE_INT words)
{
switch (mode)
{
static void
function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- tree type, HOST_WIDE_INT words, int named)
+ const_tree type, HOST_WIDE_INT words, bool named)
{
int int_nregs, sse_nregs;
if (!named && VALID_AVX256_REG_MODE (mode))
return;
- if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
- cum->words += words;
- else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
+ if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
+ && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
{
cum->nregs -= int_nregs;
cum->sse_nregs -= sse_nregs;
cum->sse_regno += sse_nregs;
}
else
- cum->words += words;
+ {
+ int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
+ cum->words = (cum->words + align - 1) & ~(align - 1);
+ cum->words += words;
+ }
}
static void
}
}
-void
-function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- tree type, int named)
+/* Update the data in CUM to advance over an argument of mode MODE and
+ data type TYPE. (TYPE is null for libcalls where that information
+ may not be available.) */
+
+static void
+ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ const_tree type, bool named)
{
HOST_WIDE_INT bytes, words;
(otherwise it is an extra parameter matching an ellipsis). */
static rtx
-function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- enum machine_mode orig_mode, tree type,
+function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ enum machine_mode orig_mode, const_tree type,
HOST_WIDE_INT bytes, HOST_WIDE_INT words)
{
static bool warnedsse, warnedmmx;
}
static rtx
-function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- enum machine_mode orig_mode, tree type, int named)
+function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ enum machine_mode orig_mode, const_tree type, bool named)
{
/* Handle a hidden AL argument containing number of registers
for varargs x86-64 functions. */
}
static rtx
-function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- enum machine_mode orig_mode, int named,
+function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ enum machine_mode orig_mode, bool named,
HOST_WIDE_INT bytes)
{
unsigned int regno;
return gen_reg_or_parallel (mode, orig_mode, regno);
}
-rtx
-function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
- tree type, int named)
+/* Return where to put the arguments to a function.
+ Return zero to push the argument on the stack, or a hard register in which to store the argument.
+
+ MODE is the argument's machine mode. TYPE is the data type of the
+ argument. It is null for libcalls where that information may not be
+ available. CUM gives information about the preceding args and about
+ the function being called. NAMED is nonzero if this argument is a
+ named parameter (otherwise it is an extra parameter matching an
+ ellipsis). */
+
+static rtx
+ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
+ const_tree type, bool named)
{
enum machine_mode mode = omode;
HOST_WIDE_INT bytes, words;
/* Return true when TYPE should be 128bit aligned for 32bit argument passing
ABI. */
static bool
-contains_aligned_value_p (tree type)
+contains_aligned_value_p (const_tree type)
{
enum machine_mode mode = TYPE_MODE (type);
if (((TARGET_SSE && SSE_REG_MODE_P (mode))
tree field;
/* Walk all the structure fields. */
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL
&& contains_aligned_value_p (TREE_TYPE (field)))
specified mode and type. */
int
-ix86_function_arg_boundary (enum machine_mode mode, tree type)
+ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
{
int align;
if (type)
{
- /* Since canonical type is used for call, we convert it to
- canonical type if needed. */
- if (!TYPE_STRUCTURAL_EQUALITY_P (type))
- type = TYPE_CANONICAL (type);
+ /* Since the main variant type is used for call, we convert it to
+ the main variant type. */
+ type = TYPE_MAIN_VARIANT (type);
align = TYPE_ALIGN (type);
}
else
/* Return true iff type is returned in memory. */
-static int ATTRIBUTE_UNUSED
+static bool ATTRIBUTE_UNUSED
return_in_memory_32 (const_tree type, enum machine_mode mode)
{
HOST_WIDE_INT size;
if (mode == BLKmode)
- return 1;
+ return true;
size = int_size_in_bytes (type);
if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
- return 0;
+ return false;
if (VECTOR_MODE_P (mode) || mode == TImode)
{
/* User-created vectors small enough to fit in EAX. */
if (size < 8)
- return 0;
+ return false;
/* MMX/3dNow values are returned in MM0,
except when it doesn't exits. */
if (size == 8)
- return (TARGET_MMX ? 0 : 1);
+ return !TARGET_MMX;
/* SSE values are returned in XMM0, except when it doesn't exist. */
if (size == 16)
- return (TARGET_SSE ? 0 : 1);
+ return !TARGET_SSE;
/* AVX values are returned in YMM0, except when it doesn't exist. */
if (size == 32)
- return TARGET_AVX ? 0 : 1;
+ return !TARGET_AVX;
}
if (mode == XFmode)
- return 0;
+ return false;
if (size > 12)
- return 1;
+ return true;
/* OImode shouldn't be used directly. */
gcc_assert (mode != OImode);
- return 0;
+ return false;
}
-static int ATTRIBUTE_UNUSED
+static bool ATTRIBUTE_UNUSED
return_in_memory_64 (const_tree type, enum machine_mode mode)
{
int needed_intregs, needed_sseregs;
return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
}
-static int ATTRIBUTE_UNUSED
+static bool ATTRIBUTE_UNUSED
return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
{
HOST_WIDE_INT size = int_size_in_bytes (type);
/* __m128 is returned in xmm0. */
if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
&& !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
- return 0;
+ return false;
/* Otherwise, the size must be exactly in [1248]. */
- return (size != 1 && size != 2 && size != 4 && size != 8);
+ return size != 1 && size != 2 && size != 4 && size != 8;
}
static bool
return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
#else
const enum machine_mode mode = type_natural_mode (type, NULL);
-
+
if (TARGET_64BIT)
{
if (ix86_function_type_abi (fntype) == MS_ABI)
}
/* Return false iff TYPE is returned in memory. This version is used
- on Solaris 10. It is similar to the generic ix86_return_in_memory,
+ on Solaris 2. It is similar to the generic ix86_return_in_memory,
but differs notably in that when MMX is available, 8-byte vectors
are returned in memory, rather than in MMX registers. */
bool
-ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
int size;
enum machine_mode mode = type_natural_mode (type, NULL);
if (!TARGET_64BIT || abi == MS_ABI)
return build_pointer_type (char_type_node);
- record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+ record = lang_hooks.types.make_type (RECORD_TYPE);
type_decl = build_decl (BUILTINS_LOCATION,
TYPE_DECL, get_identifier ("__va_list_tag"), record);
TREE_CHAIN (record) = type_decl;
TYPE_NAME (record) = type_decl;
TYPE_FIELDS (record) = f_gpr;
- TREE_CHAIN (f_gpr) = f_fpr;
- TREE_CHAIN (f_fpr) = f_ovf;
- TREE_CHAIN (f_ovf) = f_sav;
+ DECL_CHAIN (f_gpr) = f_fpr;
+ DECL_CHAIN (f_fpr) = f_ovf;
+ DECL_CHAIN (f_ovf) = f_sav;
layout_type (record);
setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
{
rtx save_area, mem;
- rtx label;
- rtx tmp_reg;
- rtx nsse_reg;
alias_set_type set;
- int i;
+ int i, max;
/* GPR size of varargs save area. */
if (cfun->va_list_gpr_size)
/* FPR size of varargs save area. We don't need it if we don't pass
anything in SSE registers. */
- if (cum->sse_nregs && cfun->va_list_fpr_size)
+ if (TARGET_SSE && cfun->va_list_fpr_size)
ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
else
ix86_varargs_fpr_size = 0;
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
- for (i = cum->regno;
- i < X86_64_REGPARM_MAX
- && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
- i++)
+ max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+ if (max > X86_64_REGPARM_MAX)
+ max = X86_64_REGPARM_MAX;
+
+ for (i = cum->regno; i < max; i++)
{
mem = gen_rtx_MEM (Pmode,
plus_constant (save_area, i * UNITS_PER_WORD));
if (ix86_varargs_fpr_size)
{
+ enum machine_mode smode;
+ rtx label, test;
+
/* Now emit code to save SSE registers. The AX parameter contains number
- of SSE parameter registers used to call this function. We use
- sse_prologue_save insn template that produces computed jump across
- SSE saves. We need some preparation work to get this working. */
+ of SSE parameter registers used to call this function, though all we
+ actually check here is the zero/non-zero status. */
label = gen_label_rtx ();
+ test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
+ emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
+ label));
- nsse_reg = gen_reg_rtx (Pmode);
- emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
-
- /* Compute address of memory block we save into. We always use pointer
- pointing 127 bytes after first byte to store - this is needed to keep
- instruction size limited by 4 bytes (5 bytes for AVX) with one
- byte displacement. */
- tmp_reg = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- plus_constant (save_area,
- ix86_varargs_gpr_size + 127)));
- mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
- MEM_NOTRAP_P (mem) = 1;
- set_mem_alias_set (mem, set);
- set_mem_align (mem, 64);
+ /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
+ we used movdqa (i.e. TImode) instead? Perhaps even better would
+ be if we could determine the real mode of the data, via a hook
+ into pass_stdarg. Ignore all that for now. */
+ smode = V4SFmode;
+ if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
+ crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
+
+ max = cum->sse_regno + cfun->va_list_fpr_size / 16;
+ if (max > X86_64_SSE_REGPARM_MAX)
+ max = X86_64_SSE_REGPARM_MAX;
+
+ for (i = cum->sse_regno; i < max; ++i)
+ {
+ mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
+ mem = gen_rtx_MEM (smode, mem);
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+ set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
- /* And finally do the dirty job! */
- emit_insn (gen_sse_prologue_save (mem, nsse_reg,
- GEN_INT (cum->sse_regno), label,
- gen_reg_rtx (Pmode)));
+ emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
+ }
+
+ emit_label (label);
}
}
For stdargs, we do want to skip the last named argument. */
next_cum = *cum;
if (stdarg_p (fntype))
- function_arg_advance (&next_cum, mode, type, 1);
+ ix86_function_arg_advance (&next_cum, mode, type, true);
if (cum->call_abi == MS_ABI)
setup_incoming_varargs_ms_64 (&next_cum);
}
f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
- f_fpr = TREE_CHAIN (f_gpr);
- f_ovf = TREE_CHAIN (f_fpr);
- f_sav = TREE_CHAIN (f_ovf);
-
- valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
- gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
- fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
- ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
- sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+ f_fpr = DECL_CHAIN (f_gpr);
+ f_ovf = DECL_CHAIN (f_fpr);
+ f_sav = DECL_CHAIN (f_ovf);
+
+ valist = build_simple_mem_ref (valist);
+ TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
+ /* The following should be folded into the MEM_REF offset. */
+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
+ f_gpr, NULL_TREE);
+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
+ f_fpr, NULL_TREE);
+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
+ f_ovf, NULL_TREE);
+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
+ f_sav, NULL_TREE);
/* Count number of gp and fp argument registers used. */
words = crtl->args.info.words;
return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
- f_fpr = TREE_CHAIN (f_gpr);
- f_ovf = TREE_CHAIN (f_fpr);
- f_sav = TREE_CHAIN (f_ovf);
+ f_fpr = DECL_CHAIN (f_gpr);
+ f_ovf = DECL_CHAIN (f_fpr);
+ f_sav = DECL_CHAIN (f_ovf);
gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
}
if (need_temp)
{
- int i;
+ int i, prev_size = 0;
tree temp = create_tmp_var (type, "va_arg_tmp");
/* addr = &temp; */
rtx slot = XVECEXP (container, 0, i);
rtx reg = XEXP (slot, 0);
enum machine_mode mode = GET_MODE (reg);
- tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
- tree addr_type = build_pointer_type (piece_type);
- tree daddr_type = build_pointer_type_for_mode (piece_type,
- ptr_mode, true);
+ tree piece_type;
+ tree addr_type;
+ tree daddr_type;
tree src_addr, src;
int src_offset;
tree dest_addr, dest;
+ int cur_size = GET_MODE_SIZE (mode);
+
+ if (prev_size + cur_size > size)
+ {
+ cur_size = size - prev_size;
+ mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
+ if (mode == BLKmode)
+ mode = QImode;
+ }
+ piece_type = lang_hooks.types.type_for_mode (mode, 1);
+ if (mode == GET_MODE (reg))
+ addr_type = build_pointer_type (piece_type);
+ else
+ addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+ true);
+ daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+ true);
if (SSE_REGNO_P (REGNO (reg)))
{
src_addr = fold_convert (addr_type, src_addr);
src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
size_int (src_offset));
- src = build_va_arg_indirect_ref (src_addr);
dest_addr = fold_convert (daddr_type, addr);
dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
size_int (INTVAL (XEXP (slot, 1))));
- dest = build_va_arg_indirect_ref (dest_addr);
+ if (cur_size == GET_MODE_SIZE (mode))
+ {
+ src = build_va_arg_indirect_ref (src_addr);
+ dest = build_va_arg_indirect_ref (dest_addr);
- gimplify_assign (dest, src, pre_p);
+ gimplify_assign (dest, src, pre_p);
+ }
+ else
+ {
+ tree copy
+ = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
+ 3, dest_addr, src_addr,
+ size_int (cur_size));
+ gimplify_and_add (copy, pre_p);
+ }
+ prev_size += cur_size;
}
}
arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
/* Care for on-stack alignment if needed. */
- if (arg_boundary <= 64
- || integer_zerop (TYPE_SIZE (type)))
+ if (arg_boundary <= 64 || size == 0)
t = ovf;
else
{
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
size_int (-align));
t = fold_convert (TREE_TYPE (ovf), t);
- if (crtl->stack_alignment_needed < arg_boundary)
- crtl->stack_alignment_needed = arg_boundary;
}
+
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
return build_va_arg_indirect_ref (addr);
}
\f
-/* Return nonzero if OPNUM's MEM should be matched
+/* Return true if OPNUM's MEM should be matched
in movabs* patterns. */
-int
+bool
ix86_check_movabs (rtx insn, int opnum)
{
rtx set, mem;
while (GET_CODE (mem) == SUBREG)
mem = SUBREG_REG (mem);
gcc_assert (MEM_P (mem));
- return (volatile_ok || !MEM_VOLATILE_P (mem));
+ return volatile_ok || !MEM_VOLATILE_P (mem);
}
\f
/* Initialize the table of extra 80387 mathematical constants. */
ext_80387_constants_init = 1;
}
-/* Return true if the constant is something that can be loaded with
- a special instruction. */
+/* Return non-zero if the constant is something that
+ can be loaded with a special instruction. */
int
standard_80387_constant_p (rtx x)
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
else
- return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+ return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
case MODE_TI:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
gcc_unreachable ();
}
-/* Returns 1 if OP contains a symbol reference */
+/* Returns true if OP contains a symbol reference */
-int
+bool
symbolic_reference_mentioned_p (rtx op)
{
const char *fmt;
int i;
if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
- return 1;
+ return true;
fmt = GET_RTX_FORMAT (GET_CODE (op));
for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
for (j = XVECLEN (op, i) - 1; j >= 0; j--)
if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
- return 1;
+ return true;
}
else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
-/* Return 1 if it is appropriate to emit `ret' instructions in the
+/* Return true if it is appropriate to emit `ret' instructions in the
body of a function. Do this only if the epilogue is simple, needing a
couple of insns. Prior to reloading, we can't tell how many registers
- must be saved, so return 0 then. Return 0 if there is no frame
+ must be saved, so return false then. Return false if there is no frame
marker to de-allocate. */
-int
+bool
ix86_can_use_return_insn_p (void)
{
struct ix86_frame frame;
if (! reload_completed || frame_pointer_needed)
return 0;
- /* Don't allow more than 32 pop, since that's all we can do
+ /* Don't allow more than 32k pop, since that's all we can do
with one instruction. */
- if (crtl->args.pops_args
- && crtl->args.size >= 32768)
+ if (crtl->args.pops_args && crtl->args.size >= 32768)
return 0;
ix86_compute_frame_layout (&frame);
- return frame.to_allocate == 0 && frame.padding0 == 0
- && (frame.nregs + frame.nsseregs) == 0;
+ return (frame.stack_pointer_offset == UNITS_PER_WORD
+ && (frame.nregs + frame.nsseregs) == 0);
}
\f
/* Value should be nonzero if functions must have frame pointers.
|| ix86_current_function_calls_tls_descriptor))
return true;
- if (crtl->profile)
+ if (crtl->profile && !flag_fentry)
return true;
return false;
assemble_name (asm_out_file, name);
fputs ("\n\t.private_extern\t", asm_out_file);
assemble_name (asm_out_file, name);
- fputs ("\n", asm_out_file);
+ putc ('\n', asm_out_file);
ASM_OUTPUT_LABEL (asm_out_file, name);
DECL_WEAK (decl) = 1;
}
{
DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
- (*targetm.asm_out.unique_section) (decl, 0);
+ targetm.asm_out.unique_section (decl, 0);
switch_to_section (get_named_section (decl, NULL, 0));
- (*targetm.asm_out.globalize_label) (asm_out_file, name);
+ targetm.asm_out.globalize_label (asm_out_file, name);
fputs ("\t.hidden\t", asm_out_file);
assemble_name (asm_out_file, name);
putc ('\n', asm_out_file);
ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
#endif
- (*targetm.asm_out.internal_label) (asm_out_file, "L",
- CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
+ targetm.asm_out.internal_label (asm_out_file, "L",
+ CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
if (flag_pic)
{
/* Ensure all queued register saves are flushed before the
call. */
if (dwarf2out_do_frame ())
- {
- rtx insn;
- start_sequence ();
- insn = emit_barrier ();
- end_sequence ();
- dwarf2out_frame_debug (insn, false);
- }
+ dwarf2out_flush_queued_reg_saves ();
#endif
xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
xops[2] = gen_rtx_MEM (QImode, xops[2]);
static rtx
gen_push (rtx arg)
{
- if (ix86_cfa_state->reg == stack_pointer_rtx)
- ix86_cfa_state->offset += UNITS_PER_WORD;
+ struct machine_function *m = cfun->machine;
+
+ if (m->fs.cfa_reg == stack_pointer_rtx)
+ m->fs.cfa_offset += UNITS_PER_WORD;
+ m->fs.sp_offset += UNITS_PER_WORD;
return gen_rtx_SET (VOIDmode,
gen_rtx_MEM (Pmode,
arg);
}
-/* Return >= 0 if there is an unused call-clobbered register available
- for the entire function. */
+/* Generate an "pop" pattern for input ARG. */
-static unsigned int
-ix86_select_alt_pic_regnum (void)
+static rtx
+gen_pop (rtx arg)
{
- if (current_function_is_leaf && !crtl->profile
- && !ix86_current_function_calls_tls_descriptor)
+ return gen_rtx_SET (VOIDmode,
+ arg,
+ gen_rtx_MEM (Pmode,
+ gen_rtx_POST_INC (Pmode,
+ stack_pointer_rtx)));
+}
+
+/* Return >= 0 if there is an unused call-clobbered register available
+ for the entire function. */
+
+static unsigned int
+ix86_select_alt_pic_regnum (void)
+{
+ if (current_function_is_leaf
+ && !crtl->profile
+ && !ix86_current_function_calls_tls_descriptor)
{
int i, drap;
/* Can't use the same register for both PIC and DRAP. */
HOST_WIDE_INT offset;
unsigned int preferred_alignment;
HOST_WIDE_INT size = get_frame_size ();
+ HOST_WIDE_INT to_allocate;
frame->nregs = ix86_nsaved_regs ();
frame->nsseregs = ix86_nsaved_sseregs ();
preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
/* MS ABI seem to require stack alignment to be always 16 except for function
- prologues. */
- if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
+ prologues and leaf. */
+ if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
+ && (!current_function_is_leaf || cfun->calls_alloca != 0
+ || ix86_current_function_calls_tls_descriptor))
{
preferred_alignment = 16;
stack_alignment_needed = 16;
else
frame->save_regs_using_mov = false;
+ /* If static stack checking is enabled and done with probes, the registers
+ need to be saved before allocating the frame. */
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+ frame->save_regs_using_mov = false;
+
/* Skip return address. */
offset = UNITS_PER_WORD;
frame->hard_frame_pointer_offset = offset;
- /* Set offset to aligned because the realigned frame starts from
- here. */
- if (stack_realign_fp)
- offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
-
/* Register save area */
offset += frame->nregs * UNITS_PER_WORD;
+ frame->reg_save_offset = offset;
- /* Align SSE reg save area. */
+ /* Align and set SSE register save area. */
if (frame->nsseregs)
- frame->padding0 = ((offset + 16 - 1) & -16) - offset;
- else
- frame->padding0 = 0;
-
- /* SSE register save area. */
- offset += frame->padding0 + frame->nsseregs * 16;
+ {
+ /* The only ABI that has saved SSE registers (Win64) also has a
+ 16-byte aligned default stack, and thus we don't need to be
+ within the re-aligned local stack frame to save them. */
+ gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
+ offset = (offset + 16 - 1) & -16;
+ offset += frame->nsseregs * 16;
+ }
+ frame->sse_reg_save_offset = offset;
+
+ /* The re-aligned stack starts here. Values before this point are not
+ directly comparable with values below this point. In order to make
+ sure that no value happens to be the same before and after, force
+ the alignment computation below to add a non-zero value. */
+ if (stack_realign_fp)
+ offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
/* Va-arg area */
frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
offset += frame->va_arg_size;
/* Align start of frame for local function. */
- frame->padding1 = ((offset + stack_alignment_needed - 1)
- & -stack_alignment_needed) - offset;
-
- offset += frame->padding1;
+ offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
/* Frame pointer points here. */
frame->frame_pointer_offset = offset;
or using alloca. */
if (!current_function_is_leaf || cfun->calls_alloca
|| ix86_current_function_calls_tls_descriptor)
- frame->padding2 = ((offset + preferred_alignment - 1)
- & -preferred_alignment) - offset;
- else
- frame->padding2 = 0;
-
- offset += frame->padding2;
+ offset = (offset + preferred_alignment - 1) & -preferred_alignment;
/* We've reached end of stack frame. */
frame->stack_pointer_offset = offset;
/* Size prologue needs to allocate. */
- frame->to_allocate =
- (size + frame->padding1 + frame->padding2
- + frame->outgoing_arguments_size + frame->va_arg_size);
+ to_allocate = offset - frame->sse_reg_save_offset;
- if ((!frame->to_allocate && frame->nregs <= 1)
- || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
+ if ((!to_allocate && frame->nregs <= 1)
+ || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
frame->save_regs_using_mov = false;
- if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
+ if (ix86_using_red_zone ()
&& current_function_sp_is_unchanging
&& current_function_is_leaf
&& !ix86_current_function_calls_tls_descriptor)
{
- frame->red_zone_size = frame->to_allocate;
+ frame->red_zone_size = to_allocate;
if (frame->save_regs_using_mov)
frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
}
else
frame->red_zone_size = 0;
- frame->to_allocate -= frame->red_zone_size;
frame->stack_pointer_offset -= frame->red_zone_size;
}
+/* This is semi-inlined memory_address_length, but simplified
+ since we know that we're always dealing with reg+offset, and
+ to avoid having to create and discard all that rtl. */
+
+static inline int
+choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
+{
+ int len = 4;
+
+ if (offset == 0)
+ {
+ /* EBP and R13 cannot be encoded without an offset. */
+ len = (regno == BP_REG || regno == R13_REG);
+ }
+ else if (IN_RANGE (offset, -128, 127))
+ len = 1;
+
+ /* ESP and R12 must be encoded with a SIB byte. */
+ if (regno == SP_REG || regno == R12_REG)
+ len++;
+
+ return len;
+}
+
+/* Return an RTX that points to CFA_OFFSET within the stack frame.
+ The valid base registers are taken from CFUN->MACHINE->FS. */
+
+static rtx
+choose_baseaddr (HOST_WIDE_INT cfa_offset)
+{
+ const struct machine_function *m = cfun->machine;
+ rtx base_reg = NULL;
+ HOST_WIDE_INT base_offset = 0;
+
+ if (m->use_fast_prologue_epilogue)
+ {
+ /* Choose the base register most likely to allow the most scheduling
+ opportunities. Generally FP is valid througout the function,
+ while DRAP must be reloaded within the epilogue. But choose either
+ over the SP due to increased encoding size. */
+
+ if (m->fs.fp_valid)
+ {
+ base_reg = hard_frame_pointer_rtx;
+ base_offset = m->fs.fp_offset - cfa_offset;
+ }
+ else if (m->fs.drap_valid)
+ {
+ base_reg = crtl->drap_reg;
+ base_offset = 0 - cfa_offset;
+ }
+ else if (m->fs.sp_valid)
+ {
+ base_reg = stack_pointer_rtx;
+ base_offset = m->fs.sp_offset - cfa_offset;
+ }
+ }
+ else
+ {
+ HOST_WIDE_INT toffset;
+ int len = 16, tlen;
+
+ /* Choose the base register with the smallest address encoding.
+ With a tie, choose FP > DRAP > SP. */
+ if (m->fs.sp_valid)
+ {
+ base_reg = stack_pointer_rtx;
+ base_offset = m->fs.sp_offset - cfa_offset;
+ len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
+ }
+ if (m->fs.drap_valid)
+ {
+ toffset = 0 - cfa_offset;
+ tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
+ if (tlen <= len)
+ {
+ base_reg = crtl->drap_reg;
+ base_offset = toffset;
+ len = tlen;
+ }
+ }
+ if (m->fs.fp_valid)
+ {
+ toffset = m->fs.fp_offset - cfa_offset;
+ tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
+ if (tlen <= len)
+ {
+ base_reg = hard_frame_pointer_rtx;
+ base_offset = toffset;
+ len = tlen;
+ }
+ }
+ }
+ gcc_assert (base_reg != NULL);
+
+ return plus_constant (base_reg, base_offset);
+}
+
/* Emit code to save registers in the prologue. */
static void
}
}
-/* Emit code to save registers using MOV insns. First register
- is restored from POINTER + OFFSET. */
+/* Emit a single register save at CFA - CFA_OFFSET. */
+
+static void
+ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
+ HOST_WIDE_INT cfa_offset)
+{
+ struct machine_function *m = cfun->machine;
+ rtx reg = gen_rtx_REG (mode, regno);
+ rtx mem, addr, base, insn;
+
+ addr = choose_baseaddr (cfa_offset);
+ mem = gen_frame_mem (mode, addr);
+
+ /* For SSE saves, we need to indicate the 128-bit alignment. */
+ set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
+
+ insn = emit_move_insn (mem, reg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ base = addr;
+ if (GET_CODE (base) == PLUS)
+ base = XEXP (base, 0);
+ gcc_checking_assert (REG_P (base));
+
+ /* When saving registers into a re-aligned local stack frame, avoid
+ any tricky guessing by dwarf2out. */
+ if (m->fs.realigned)
+ {
+ gcc_checking_assert (stack_realign_drap);
+
+ if (regno == REGNO (crtl->drap_reg))
+ {
+ /* A bit of a hack. We force the DRAP register to be saved in
+ the re-aligned stack frame, which provides us with a copy
+ of the CFA that will last past the prologue. Install it. */
+ gcc_checking_assert (cfun->machine->fs.fp_valid);
+ addr = plus_constant (hard_frame_pointer_rtx,
+ cfun->machine->fs.fp_offset - cfa_offset);
+ mem = gen_rtx_MEM (mode, addr);
+ add_reg_note (insn, REG_CFA_DEF_CFA, mem);
+ }
+ else
+ {
+ /* The frame pointer is a stable reference within the
+ aligned frame. Use it. */
+ gcc_checking_assert (cfun->machine->fs.fp_valid);
+ addr = plus_constant (hard_frame_pointer_rtx,
+ cfun->machine->fs.fp_offset - cfa_offset);
+ mem = gen_rtx_MEM (mode, addr);
+ add_reg_note (insn, REG_CFA_EXPRESSION,
+ gen_rtx_SET (VOIDmode, mem, reg));
+ }
+ }
+
+ /* The memory may not be relative to the current CFA register,
+ which means that we may need to generate a new pattern for
+ use by the unwind info. */
+ else if (base != m->fs.cfa_reg)
+ {
+ addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
+ mem = gen_rtx_MEM (mode, addr);
+ add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
+ }
+}
+
+/* Emit code to save registers using MOV insns.
+ First register is stored at CFA - CFA_OFFSET. */
static void
-ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
+ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
{
unsigned int regno;
- rtx insn;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
{
- insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
- Pmode, offset),
- gen_rtx_REG (Pmode, regno));
- RTX_FRAME_RELATED_P (insn) = 1;
- offset += UNITS_PER_WORD;
+ ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
+ cfa_offset -= UNITS_PER_WORD;
}
}
-/* Emit code to save registers using MOV insns. First register
- is restored from POINTER + OFFSET. */
+/* Emit code to save SSE registers using MOV insns.
+ First register is stored at CFA - CFA_OFFSET. */
static void
-ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
+ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
{
unsigned int regno;
- rtx insn;
- rtx mem;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
{
- mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
- set_mem_align (mem, 128);
- insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
- RTX_FRAME_RELATED_P (insn) = 1;
- offset += 16;
+ ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
+ cfa_offset -= 16;
}
}
static GTY(()) rtx queued_cfa_restores;
/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
- manipulation insn. Don't add it if the previously
- saved value will be left untouched within stack red-zone till return,
- as unwinders can find the same value in the register and
- on the stack. */
+ manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
+ Don't add the note if the previously saved value will be left untouched
+ within stack red-zone till return, as unwinders can find the same value
+ in the register and on the stack. */
static void
-ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
+ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
{
- if (TARGET_RED_ZONE
- && !TARGET_64BIT_MS_ABI
- && red_offset + RED_ZONE_SIZE >= 0
- && crtl->args.pops_args < 65536)
+ if (cfa_offset <= cfun->machine->fs.red_zone_offset)
return;
if (insn)
pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
int style, bool set_cfa)
{
+ struct machine_function *m = cfun->machine;
rtx insn;
if (! TARGET_64BIT)
- insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
else if (x86_64_immediate_operand (offset, DImode))
- insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
else
{
- rtx r11;
+ rtx tmp;
/* r11 is used by indirect sibcall return as well, set before the
- epilogue and used after the epilogue. ATM indirect sibcall
- shouldn't be used together with huge frame sizes in one
- function because of the frame_size check in sibcall.c. */
- gcc_assert (style);
- r11 = gen_rtx_REG (DImode, R11_REG);
- insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
+ epilogue and used after the epilogue. */
+ if (style)
+ tmp = gen_rtx_REG (DImode, R11_REG);
+ else
+ {
+ gcc_assert (src != hard_frame_pointer_rtx
+ && dest != hard_frame_pointer_rtx);
+ tmp = hard_frame_pointer_rtx;
+ }
+ insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
if (style < 0)
RTX_FRAME_RELATED_P (insn) = 1;
- insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
- offset));
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
+ offset));
}
if (style >= 0)
{
rtx r;
- gcc_assert (ix86_cfa_state->reg == src);
- ix86_cfa_state->offset += INTVAL (offset);
- ix86_cfa_state->reg = dest;
-
+ gcc_assert (m->fs.cfa_reg == src);
+ m->fs.cfa_offset += INTVAL (offset);
+ m->fs.cfa_reg = dest;
+
r = gen_rtx_PLUS (Pmode, src, offset);
r = gen_rtx_SET (VOIDmode, dest, r);
add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
}
else if (style < 0)
RTX_FRAME_RELATED_P (insn) = 1;
+
+ if (dest == stack_pointer_rtx)
+ {
+ HOST_WIDE_INT ooffset = m->fs.sp_offset;
+ bool valid = m->fs.sp_valid;
+
+ if (src == hard_frame_pointer_rtx)
+ {
+ valid = m->fs.fp_valid;
+ ooffset = m->fs.fp_offset;
+ }
+ else if (src == crtl->drap_reg)
+ {
+ valid = m->fs.drap_valid;
+ ooffset = 0;
+ }
+ else
+ {
+ /* Else there are two possibilities: SP itself, which we set
+ up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
+ taken care of this by hand along the eh_return path. */
+ gcc_checking_assert (src == stack_pointer_rtx
+ || offset == const0_rtx);
+ }
+
+ m->fs.sp_offset = ooffset - INTVAL (offset);
+ m->fs.sp_valid = valid;
+ }
}
/* Find an available register to be used as dynamic realign argument
Return: the regno of chosen register. */
-static unsigned int
+static unsigned int
find_drap_reg (void)
{
tree decl = cfun->decl;
register in such case. */
if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
return DI_REG;
-
+
/* Reuse static chain register if it isn't used for parameter
passing. */
if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
if (ix86_user_incoming_stack_boundary)
incoming_stack_boundary = ix86_user_incoming_stack_boundary;
/* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
- if -mstackrealign is used, it isn't used for sibcall check and
+ if -mstackrealign is used, it isn't used for sibcall check and
estimated stack alignment is 128bit. */
else if (!sibcall
&& !TARGET_64BIT
drap_vreg = copy_to_reg (arg_ptr);
seq = get_insns ();
end_sequence ();
-
+
insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
if (!optimize)
{
return virtual_incoming_args_rtx;
}
+struct scratch_reg {
+ rtx reg;
+ bool saved;
+};
+
+/* Return a short-lived scratch register for use on function entry.
+ In 32-bit mode, it is valid only after the registers are saved
+ in the prologue. This register must be released by means of
+ release_scratch_register_on_entry once it is dead. */
+
+static void
+get_scratch_register_on_entry (struct scratch_reg *sr)
+{
+ int regno;
+
+ sr->saved = false;
+
+ if (TARGET_64BIT)
+ {
+ /* We always use R11 in 64-bit mode. */
+ regno = R11_REG;
+ }
+ else
+ {
+ tree decl = current_function_decl, fntype = TREE_TYPE (decl);
+ bool fastcall_p
+ = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
+ bool static_chain_p = DECL_STATIC_CHAIN (decl);
+ int regparm = ix86_function_regparm (fntype, decl);
+ int drap_regno
+ = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
+
+ /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
+ for the static chain register. */
+ if ((regparm < 1 || (fastcall_p && !static_chain_p))
+ && drap_regno != AX_REG)
+ regno = AX_REG;
+ else if (regparm < 2 && drap_regno != DX_REG)
+ regno = DX_REG;
+ /* ecx is the static chain register. */
+ else if (regparm < 3 && !fastcall_p && !static_chain_p
+ && drap_regno != CX_REG)
+ regno = CX_REG;
+ else if (ix86_save_reg (BX_REG, true))
+ regno = BX_REG;
+ /* esi is the static chain register. */
+ else if (!(regparm == 3 && static_chain_p)
+ && ix86_save_reg (SI_REG, true))
+ regno = SI_REG;
+ else if (ix86_save_reg (DI_REG, true))
+ regno = DI_REG;
+ else
+ {
+ regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
+ sr->saved = true;
+ }
+ }
+
+ sr->reg = gen_rtx_REG (Pmode, regno);
+ if (sr->saved)
+ {
+ rtx insn = emit_insn (gen_push (sr->reg));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+}
+
+/* Release a scratch register obtained from the preceding function. */
+
+static void
+release_scratch_register_on_entry (struct scratch_reg *sr)
+{
+ if (sr->saved)
+ {
+ rtx x, insn = emit_insn (gen_pop (sr->reg));
+
+ /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
+ RTX_FRAME_RELATED_P (insn) = 1;
+ x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
+ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
+ }
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
+
+static void
+ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
+{
+ /* We skip the probe for the first interval + a small dope of 4 words and
+ probe that many bytes past the specified size to maintain a protection
+ area at the botton of the stack. */
+ const int dope = 4 * UNITS_PER_WORD;
+ rtx size_rtx = GEN_INT (size);
+
+ /* See if we have a constant small number of probes to generate. If so,
+ that's the easy case. The run-time loop is made up of 11 insns in the
+ generic case while the compile-time loop is made up of 3+2*(n-1) insns
+ for n # of intervals. */
+ if (size <= 5 * PROBE_INTERVAL)
+ {
+ HOST_WIDE_INT i, adjust;
+ bool first_probe = true;
+
+ /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
+ values of N from 1 until it exceeds SIZE. If only one probe is
+ needed, this will not generate any code. Then adjust and probe
+ to PROBE_INTERVAL + SIZE. */
+ for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+ {
+ if (first_probe)
+ {
+ adjust = 2 * PROBE_INTERVAL + dope;
+ first_probe = false;
+ }
+ else
+ adjust = PROBE_INTERVAL;
+
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, -adjust)));
+ emit_stack_probe (stack_pointer_rtx);
+ }
+
+ if (first_probe)
+ adjust = size + PROBE_INTERVAL + dope;
+ else
+ adjust = size + PROBE_INTERVAL - i;
+
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, -adjust)));
+ emit_stack_probe (stack_pointer_rtx);
+
+ /* Adjust back to account for the additional first interval. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ PROBE_INTERVAL + dope)));
+ }
+
+ /* Otherwise, do the same as above, but in a loop. Note that we must be
+ extra careful with variables wrapping around because we might be at
+ the very top (or the very bottom) of the address space and we have
+ to be able to handle this case properly; in particular, we use an
+ equality test for the loop condition. */
+ else
+ {
+ HOST_WIDE_INT rounded_size;
+ struct scratch_reg sr;
+
+ get_scratch_register_on_entry (&sr);
+
+
+ /* Step 1: round SIZE to the previous multiple of the interval. */
+
+ rounded_size = size & -PROBE_INTERVAL;
+
+
+ /* Step 2: compute initial and final value of the loop counter. */
+
+ /* SP = SP_0 + PROBE_INTERVAL. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ - (PROBE_INTERVAL + dope))));
+
+ /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
+ emit_move_insn (sr.reg, GEN_INT (-rounded_size));
+ emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
+ gen_rtx_PLUS (Pmode, sr.reg,
+ stack_pointer_rtx)));
+
+
+ /* Step 3: the loop
+
+ while (SP != LAST_ADDR)
+ {
+ SP = SP + PROBE_INTERVAL
+ probe at SP
+ }
+
+ adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
+ values of N from 1 until it is equal to ROUNDED_SIZE. */
+
+ emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
+
+
+ /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
+ assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
+
+ if (size != rounded_size)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ rounded_size - size)));
+ emit_stack_probe (stack_pointer_rtx);
+ }
+
+ /* Adjust back to account for the additional first interval. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ PROBE_INTERVAL + dope)));
+
+ release_scratch_register_on_entry (&sr);
+ }
+
+ gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
+ cfun->machine->fs.sp_offset += size;
+
+ /* Make sure nothing is scheduled before we are done. */
+ emit_insn (gen_blockage ());
+}
+
+/* Adjust the stack pointer up to REG while probing it. */
+
+const char *
+output_adjust_stack_and_probe (rtx reg)
+{
+ static int labelno = 0;
+ char loop_lab[32], end_lab[32];
+ rtx xops[2];
+
+ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+ ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+ /* Jump to END_LAB if SP == LAST_ADDR. */
+ xops[0] = stack_pointer_rtx;
+ xops[1] = reg;
+ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+ fputs ("\tje\t", asm_out_file);
+ assemble_name_raw (asm_out_file, end_lab);
+ fputc ('\n', asm_out_file);
+
+ /* SP = SP + PROBE_INTERVAL. */
+ xops[1] = GEN_INT (PROBE_INTERVAL);
+ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+ /* Probe at SP. */
+ xops[1] = const0_rtx;
+ output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
+
+ fprintf (asm_out_file, "\tjmp\t");
+ assemble_name_raw (asm_out_file, loop_lab);
+ fputc ('\n', asm_out_file);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+ return "";
+}
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+ inclusive. These are offsets from the current stack pointer. */
+
+static void
+ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+ /* See if we have a constant small number of probes to generate. If so,
+ that's the easy case. The run-time loop is made up of 7 insns in the
+ generic case while the compile-time loop is made up of n insns for n #
+ of intervals. */
+ if (size <= 7 * PROBE_INTERVAL)
+ {
+ HOST_WIDE_INT i;
+
+ /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+ it exceeds SIZE. If only one probe is needed, this will not
+ generate any code. Then probe at FIRST + SIZE. */
+ for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+ emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
+
+ emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
+ }
+
+ /* Otherwise, do the same as above, but in a loop. Note that we must be
+ extra careful with variables wrapping around because we might be at
+ the very top (or the very bottom) of the address space and we have
+ to be able to handle this case properly; in particular, we use an
+ equality test for the loop condition. */
+ else
+ {
+ HOST_WIDE_INT rounded_size, last;
+ struct scratch_reg sr;
+
+ get_scratch_register_on_entry (&sr);
+
+
+ /* Step 1: round SIZE to the previous multiple of the interval. */
+
+ rounded_size = size & -PROBE_INTERVAL;
+
+
+ /* Step 2: compute initial and final value of the loop counter. */
+
+ /* TEST_OFFSET = FIRST. */
+ emit_move_insn (sr.reg, GEN_INT (-first));
+
+ /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
+ last = first + rounded_size;
+
+
+ /* Step 3: the loop
+
+ while (TEST_ADDR != LAST_ADDR)
+ {
+ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+ probe at TEST_ADDR
+ }
+
+ probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+ until it is equal to ROUNDED_SIZE. */
+
+ emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
+
+
+ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+ that SIZE is equal to ROUNDED_SIZE. */
+
+ if (size != rounded_size)
+ emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ sr.reg),
+ rounded_size - size));
+
+ release_scratch_register_on_entry (&sr);
+ }
+
+ /* Make sure nothing is scheduled before we are done. */
+ emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG to END, inclusive. These are
+ offsets from the current stack pointer. */
+
+const char *
+output_probe_stack_range (rtx reg, rtx end)
+{
+ static int labelno = 0;
+ char loop_lab[32], end_lab[32];
+ rtx xops[3];
+
+ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+ ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+ /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
+ xops[0] = reg;
+ xops[1] = end;
+ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+ fputs ("\tje\t", asm_out_file);
+ assemble_name_raw (asm_out_file, end_lab);
+ fputc ('\n', asm_out_file);
+
+ /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
+ xops[1] = GEN_INT (PROBE_INTERVAL);
+ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+ /* Probe at TEST_ADDR. */
+ xops[0] = stack_pointer_rtx;
+ xops[1] = reg;
+ xops[2] = const0_rtx;
+ output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
+
+ fprintf (asm_out_file, "\tjmp\t");
+ assemble_name_raw (asm_out_file, loop_lab);
+ fputc ('\n', asm_out_file);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+ return "";
+}
+
/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
to be generated in correct form. */
-static void
+static void
ix86_finalize_stack_realign_flags (void)
{
- /* Check if stack realign is really needed after reload, and
+ /* Check if stack realign is really needed after reload, and
stores result in cfun */
unsigned int incoming_stack_boundary
= (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
void
ix86_expand_prologue (void)
{
- rtx insn;
+ struct machine_function *m = cfun->machine;
+ rtx insn, t;
bool pic_reg_used;
struct ix86_frame frame;
HOST_WIDE_INT allocate;
- int gen_frame_pointer = frame_pointer_needed;
+ bool int_registers_saved;
ix86_finalize_stack_realign_flags ();
/* DRAP should not coexist with stack_realign_fp */
gcc_assert (!(crtl->drap_reg && stack_realign_fp));
+ memset (&m->fs, 0, sizeof (m->fs));
+
/* Initialize CFA state for before the prologue. */
- ix86_cfa_state->reg = stack_pointer_rtx;
- ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
+ m->fs.cfa_reg = stack_pointer_rtx;
+ m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
+
+ /* Track SP offset to the CFA. We continue tracking this after we've
+ swapped the CFA register away from SP. In the case of re-alignment
+ this is fudged; we're interested to offsets within the local frame. */
+ m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
+ m->fs.sp_valid = true;
ix86_compute_frame_layout (&frame);
- if (ix86_function_ms_hook_prologue (current_function_decl))
+ if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
{
- rtx push, mov;
+ /* We should have already generated an error for any use of
+ ms_hook on a nested function. */
+ gcc_checking_assert (!ix86_static_chain_on_stack);
- /* Make sure the function starts with
+ /* Check if profiling is active and we shall use profiling before
+ prologue variant. If so sorry. */
+ if (crtl->profile && flag_fentry != 0)
+ sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
+
+ /* In ix86_asm_output_function_label we emitted:
8b ff movl.s %edi,%edi
55 push %ebp
8b ec movl.s %esp,%ebp
This matches the hookable function prologue in Win32 API
functions in Microsoft Windows XP Service Pack 2 and newer.
Wine uses this to enable Windows apps to hook the Win32 API
- functions provided by Wine. */
- insn = emit_insn (gen_vswapmov (gen_rtx_REG (SImode, DI_REG),
- gen_rtx_REG (SImode, DI_REG)));
- push = emit_insn (gen_push (hard_frame_pointer_rtx));
- mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
- stack_pointer_rtx));
-
- if (frame_pointer_needed && !(crtl->drap_reg
- && crtl->stack_realign_needed))
- {
- /* The push %ebp and movl.s %esp, %ebp already set up
- the frame pointer. No need to do this again. */
- gen_frame_pointer = 0;
+ functions provided by Wine.
+
+ What that means is that we've already set up the frame pointer. */
+
+ if (frame_pointer_needed
+ && !(crtl->drap_reg && crtl->stack_realign_needed))
+ {
+ rtx push, mov;
+
+ /* We've decided to use the frame pointer already set up.
+ Describe this to the unwinder by pretending that both
+ push and mov insns happen right here.
+
+ Putting the unwind info here at the end of the ms_hook
+ is done so that we can make absolutely certain we get
+ the required byte sequence at the start of the function,
+ rather than relying on an assembler that can produce
+ the exact encoding required.
+
+ However it does mean (in the unpatched case) that we have
+ a 1 insn window where the asynchronous unwind info is
+ incorrect. However, if we placed the unwind info at
+ its correct location we would have incorrect unwind info
+ in the patched case. Which is probably all moot since
+ I don't expect Wine generates dwarf2 unwind info for the
+ system libraries that use this feature. */
+
+ insn = emit_insn (gen_blockage ());
+
+ push = gen_push (hard_frame_pointer_rtx);
+ mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
+ stack_pointer_rtx);
RTX_FRAME_RELATED_P (push) = 1;
RTX_FRAME_RELATED_P (mov) = 1;
- if (ix86_cfa_state->reg == stack_pointer_rtx)
- ix86_cfa_state->reg = hard_frame_pointer_rtx;
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
+
+ /* Note that gen_push incremented m->fs.cfa_offset, even
+ though we didn't emit the push insn here. */
+ m->fs.cfa_reg = hard_frame_pointer_rtx;
+ m->fs.fp_offset = m->fs.cfa_offset;
+ m->fs.fp_valid = true;
}
else
- /* If the frame pointer is not needed, pop %ebp again. This
- could be optimized for cases where ebp needs to be backed up
- for some other reason. If stack realignment is needed, pop
- the base pointer again, align the stack, and later regenerate
- the frame pointer setup. The frame pointer generated by the
- hook prologue is not aligned, so it can't be used. */
- insn = emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
+ {
+ /* The frame pointer is not needed so pop %ebp again.
+ This leaves us with a pristine state. */
+ emit_insn (gen_pop (hard_frame_pointer_rtx));
+ }
}
/* The first insn of a function that accepts its static chain on the
stack is to push the register that would be filled in by a direct
call. This insn will be skipped by the trampoline. */
- if (ix86_static_chain_on_stack)
+ else if (ix86_static_chain_on_stack)
{
- rtx t;
-
insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
emit_insn (gen_blockage ());
/* Emit prologue code to adjust stack alignment and setup DRAP, in case
of DRAP is needed and stack realignment is really needed after reload */
- if (crtl->drap_reg && crtl->stack_realign_needed)
+ if (stack_realign_drap)
{
- rtx x, y;
int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
- int param_ptr_offset = UNITS_PER_WORD;
-
- if (ix86_static_chain_on_stack)
- param_ptr_offset += UNITS_PER_WORD;
- if (!call_used_regs[REGNO (crtl->drap_reg)])
- param_ptr_offset += UNITS_PER_WORD;
- gcc_assert (stack_realign_drap);
-
- /* Grab the argument pointer. */
- x = plus_constant (stack_pointer_rtx, param_ptr_offset);
- y = crtl->drap_reg;
-
- /* Only need to push parameter pointer reg if it is caller
- saved reg */
+ /* Only need to push parameter pointer reg if it is caller saved. */
if (!call_used_regs[REGNO (crtl->drap_reg)])
{
/* Push arg pointer reg */
- insn = emit_insn (gen_push (y));
+ insn = emit_insn (gen_push (crtl->drap_reg));
RTX_FRAME_RELATED_P (insn) = 1;
}
- insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
- RTX_FRAME_RELATED_P (insn) = 1;
- ix86_cfa_state->reg = crtl->drap_reg;
+ /* Grab the argument pointer. */
+ t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
+ insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ m->fs.cfa_reg = crtl->drap_reg;
+ m->fs.cfa_offset = 0;
/* Align the stack. */
- insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (-align_bytes)));
+ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-align_bytes)));
RTX_FRAME_RELATED_P (insn) = 1;
/* Replicate the return address on the stack so that return
address can be reached via (argp - 1) slot. This is needed
to implement macro RETURN_ADDR_RTX and intrinsic function
expand_builtin_return_addr etc. */
- x = crtl->drap_reg;
- x = gen_frame_mem (Pmode,
- plus_constant (x, -UNITS_PER_WORD));
- insn = emit_insn (gen_push (x));
+ t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
+ t = gen_frame_mem (Pmode, t);
+ insn = emit_insn (gen_push (t));
RTX_FRAME_RELATED_P (insn) = 1;
- }
- /* Note: AT&T enter does NOT have reversed args. Enter is probably
- slower on all targets. Also sdb doesn't like it. */
+ /* For the purposes of frame and register save area addressing,
+ we've started over with a new frame. */
+ m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
+ m->fs.realigned = true;
+ }
- if (gen_frame_pointer)
+ if (frame_pointer_needed && !m->fs.fp_valid)
{
+ /* Note: AT&T enter does NOT have reversed args. Enter is probably
+ slower on all targets. Also sdb doesn't like it. */
insn = emit_insn (gen_push (hard_frame_pointer_rtx));
RTX_FRAME_RELATED_P (insn) = 1;
insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
RTX_FRAME_RELATED_P (insn) = 1;
- if (ix86_cfa_state->reg == stack_pointer_rtx)
- ix86_cfa_state->reg = hard_frame_pointer_rtx;
+ if (m->fs.cfa_reg == stack_pointer_rtx)
+ m->fs.cfa_reg = hard_frame_pointer_rtx;
+ gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
+ m->fs.fp_offset = m->fs.sp_offset;
+ m->fs.fp_valid = true;
+ }
+
+ int_registers_saved = (frame.nregs == 0);
+
+ if (!int_registers_saved)
+ {
+ /* If saving registers via PUSH, do so now. */
+ if (!frame.save_regs_using_mov)
+ {
+ ix86_emit_save_regs ();
+ int_registers_saved = true;
+ gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+ }
+
+ /* When using red zone we may start register saving before allocating
+ the stack frame saving one cycle of the prologue. However, avoid
+ doing this if we have to probe the stack; at least on x86_64 the
+ stack probe can turn into a call that clobbers a red zone location. */
+ else if (ix86_using_red_zone ()
+ && (! TARGET_STACK_PROBE
+ || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
+ {
+ ix86_emit_save_regs_using_mov (frame.reg_save_offset);
+ int_registers_saved = true;
+ }
}
if (stack_realign_fp)
int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
+ /* The computation of the size of the re-aligned stack frame means
+ that we must allocate the size of the register save area before
+ performing the actual alignment. Otherwise we cannot guarantee
+ that there's enough storage above the realignment point. */
+ if (m->fs.sp_offset != frame.sse_reg_save_offset)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (m->fs.sp_offset
+ - frame.sse_reg_save_offset),
+ -1, false);
+
/* Align the stack. */
- insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (-align_bytes)));
- RTX_FRAME_RELATED_P (insn) = 1;
+ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-align_bytes)));
+
+ /* For the purposes of register save area addressing, the stack
+ pointer is no longer valid. As for the value of sp_offset,
+ see ix86_compute_frame_layout, which we need to match in order
+ to pass verification of stack_pointer_offset at the end. */
+ m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
+ m->fs.sp_valid = false;
}
- allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
+ allocate = frame.stack_pointer_offset - m->fs.sp_offset;
- if (!frame.save_regs_using_mov)
- ix86_emit_save_regs ();
- else
- allocate += frame.nregs * UNITS_PER_WORD;
-
- /* When using red zone we may start register saving before allocating
- the stack frame saving one cycle of the prologue. However I will
- avoid doing this if I am going to have to probe the stack since
- at least on x86_64 the stack probe can turn into a call that clobbers
- a red zone location */
- if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
- && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
- ix86_emit_save_regs_using_mov ((frame_pointer_needed
- && !crtl->stack_realign_needed)
- ? hard_frame_pointer_rtx
- : stack_pointer_rtx,
- -frame.nregs * UNITS_PER_WORD);
+ if (flag_stack_usage)
+ {
+ /* We start to count from ARG_POINTER. */
+ HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
+
+ /* If it was realigned, take into account the fake frame. */
+ if (stack_realign_drap)
+ {
+ if (ix86_static_chain_on_stack)
+ stack_size += UNITS_PER_WORD;
+
+ if (!call_used_regs[REGNO (crtl->drap_reg)])
+ stack_size += UNITS_PER_WORD;
+
+ /* This over-estimates by 1 minimal-stack-alignment-unit but
+ mitigates that by counting in the new return address slot. */
+ current_function_dynamic_stack_size
+ += crtl->stack_alignment_needed / BITS_PER_UNIT;
+ }
+
+ current_function_static_stack_size = stack_size;
+ }
+
+ /* The stack has already been decremented by the instruction calling us
+ so we need to probe unconditionally to preserve the protection area. */
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+ {
+ /* We expect the registers to be saved when probes are used. */
+ gcc_assert (int_registers_saved);
+
+ if (STACK_CHECK_MOVING_SP)
+ {
+ ix86_adjust_stack_and_probe (allocate);
+ allocate = 0;
+ }
+ else
+ {
+ HOST_WIDE_INT size = allocate;
+
+ if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
+ size = 0x80000000 - STACK_CHECK_PROTECT - 1;
+
+ if (TARGET_STACK_PROBE)
+ ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+ else
+ ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+ }
+ }
if (allocate == 0)
;
- else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (-allocate), -1,
- ix86_cfa_state->reg == stack_pointer_rtx);
+ else if (!ix86_target_stack_probe ()
+ || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
+ {
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-allocate), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ }
else
{
rtx eax = gen_rtx_REG (Pmode, AX_REG);
bool eax_live;
- rtx t;
if (cfun->machine->call_abi == MS_ABI)
eax_live = false;
emit_move_insn (eax, GEN_INT (allocate));
- if (TARGET_64BIT)
- insn = gen_allocate_stack_worker_64 (eax, eax);
- else
- insn = gen_allocate_stack_worker_32 (eax, eax);
- insn = emit_insn (insn);
+ insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
- if (ix86_cfa_state->reg == stack_pointer_rtx)
+ if (m->fs.cfa_reg == stack_pointer_rtx)
{
- ix86_cfa_state->offset += allocate;
+ m->fs.cfa_offset += allocate;
t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
RTX_FRAME_RELATED_P (insn) = 1;
}
+ m->fs.sp_offset += allocate;
if (eax_live)
{
- if (frame_pointer_needed)
- t = plus_constant (hard_frame_pointer_rtx,
- allocate
- - frame.to_allocate
- - frame.nregs * UNITS_PER_WORD);
- else
- t = plus_constant (stack_pointer_rtx, allocate);
- emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
+ t = choose_baseaddr (m->fs.sp_offset - allocate);
+ emit_move_insn (eax, gen_frame_mem (Pmode, t));
}
}
+ gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
- if (frame.save_regs_using_mov
- && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
- && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
- {
- if (!frame_pointer_needed
- || !(frame.to_allocate + frame.padding0)
- || crtl->stack_realign_needed)
- ix86_emit_save_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate
- + frame.nsseregs * 16 + frame.padding0);
- else
- ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
- -frame.nregs * UNITS_PER_WORD);
- }
- if (!frame_pointer_needed
- || !(frame.to_allocate + frame.padding0)
- || crtl->stack_realign_needed)
- ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate);
- else
- ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
- - frame.nregs * UNITS_PER_WORD
- - frame.nsseregs * 16
- - frame.padding0);
+ if (!int_registers_saved)
+ ix86_emit_save_regs_using_mov (frame.reg_save_offset);
+ if (frame.nsseregs)
+ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
pic_reg_used = false;
if (pic_offset_table_rtx
when mcount needs it. Blockage to avoid call movement across mcount
call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
note. */
- if (crtl->profile && pic_reg_used)
+ if (crtl->profile && !flag_fentry && pic_reg_used)
emit_insn (gen_prologue_use (pic_offset_table_rtx));
if (crtl->drap_reg && !crtl->stack_realign_needed)
/* vDRAP is setup but after reload it turns out stack realign
isn't necessary, here we will emit prologue to setup DRAP
without stack realign adjustment */
- rtx x;
- int drap_bp_offset = UNITS_PER_WORD * 2;
-
- if (ix86_static_chain_on_stack)
- drap_bp_offset += UNITS_PER_WORD;
- x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
- insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
+ t = choose_baseaddr (0);
+ emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
}
/* Prevent instructions from being scheduled into register save push
/* Emit code to restore REG using a POP insn. */
static void
-ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
+ix86_emit_restore_reg_using_pop (rtx reg)
{
- rtx insn = emit_insn (ix86_gen_pop1 (reg));
+ struct machine_function *m = cfun->machine;
+ rtx insn = emit_insn (gen_pop (reg));
+
+ ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
+ m->fs.sp_offset -= UNITS_PER_WORD;
- if (ix86_cfa_state->reg == crtl->drap_reg
+ if (m->fs.cfa_reg == crtl->drap_reg
&& REGNO (reg) == REGNO (crtl->drap_reg))
{
/* Previously we'd represented the CFA as an expression
the stack pointer. */
add_reg_note (insn, REG_CFA_DEF_CFA, reg);
RTX_FRAME_RELATED_P (insn) = 1;
+
+ /* This means that the DRAP register is valid for addressing too. */
+ m->fs.drap_valid = true;
return;
}
- if (ix86_cfa_state->reg == stack_pointer_rtx)
+ if (m->fs.cfa_reg == stack_pointer_rtx)
{
- ix86_cfa_state->offset -= UNITS_PER_WORD;
- add_reg_note (insn, REG_CFA_ADJUST_CFA,
- copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+ rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
+ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
RTX_FRAME_RELATED_P (insn) = 1;
+
+ m->fs.cfa_offset -= UNITS_PER_WORD;
}
/* When the frame pointer is the CFA, and we pop it, we are
for stack frames that don't allocate other data, so we assume
the stack pointer is now pointing at the return address, i.e.
the function entry state, which makes the offset be 1 word. */
- else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
- && reg == hard_frame_pointer_rtx)
+ if (reg == hard_frame_pointer_rtx)
{
- ix86_cfa_state->reg = stack_pointer_rtx;
- ix86_cfa_state->offset -= UNITS_PER_WORD;
+ m->fs.fp_valid = false;
+ if (m->fs.cfa_reg == hard_frame_pointer_rtx)
+ {
+ m->fs.cfa_reg = stack_pointer_rtx;
+ m->fs.cfa_offset -= UNITS_PER_WORD;
- add_reg_note (insn, REG_CFA_DEF_CFA,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- GEN_INT (ix86_cfa_state->offset)));
- RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (m->fs.cfa_offset)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
}
-
- ix86_add_cfa_restore_note (insn, reg, red_offset);
}
/* Emit code to restore saved registers using POP insns. */
static void
-ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
+ix86_emit_restore_regs_using_pop (void)
{
- int regno;
+ unsigned int regno;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
- {
- ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
- red_offset);
- red_offset += UNITS_PER_WORD;
- }
+ ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
}
/* Emit code and notes for the LEAVE instruction. */
static void
-ix86_emit_leave (HOST_WIDE_INT red_offset)
+ix86_emit_leave (void)
{
+ struct machine_function *m = cfun->machine;
rtx insn = emit_insn (ix86_gen_leave ());
ix86_add_queued_cfa_restore_notes (insn);
- if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
+ gcc_assert (m->fs.fp_valid);
+ m->fs.sp_valid = true;
+ m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
+ m->fs.fp_valid = false;
+
+ if (m->fs.cfa_reg == hard_frame_pointer_rtx)
{
- ix86_cfa_state->reg = stack_pointer_rtx;
- ix86_cfa_state->offset -= UNITS_PER_WORD;
+ m->fs.cfa_reg = stack_pointer_rtx;
+ m->fs.cfa_offset = m->fs.sp_offset;
- add_reg_note (insn, REG_CFA_ADJUST_CFA,
- copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ plus_constant (stack_pointer_rtx, m->fs.sp_offset));
RTX_FRAME_RELATED_P (insn) = 1;
- ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
+ ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
+ m->fs.fp_offset);
}
}
-/* Emit code to restore saved registers using MOV insns. First register
- is restored from POINTER + OFFSET. */
+/* Emit code to restore saved registers using MOV insns.
+ First register is restored from CFA - CFA_OFFSET. */
static void
-ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
- HOST_WIDE_INT red_offset,
+ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
int maybe_eh_return)
{
+ struct machine_function *m = cfun->machine;
unsigned int regno;
- rtx base_address = gen_rtx_MEM (Pmode, pointer);
- rtx insn;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
{
rtx reg = gen_rtx_REG (Pmode, regno);
+ rtx insn, mem;
+
+ mem = choose_baseaddr (cfa_offset);
+ mem = gen_frame_mem (Pmode, mem);
+ insn = emit_move_insn (reg, mem);
- /* Ensure that adjust_address won't be forced to produce pointer
- out of range allowed by x86-64 instruction set. */
- if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
- {
- rtx r11;
-
- r11 = gen_rtx_REG (DImode, R11_REG);
- emit_move_insn (r11, GEN_INT (offset));
- emit_insn (gen_adddi3 (r11, r11, pointer));
- base_address = gen_rtx_MEM (Pmode, r11);
- offset = 0;
- }
- insn = emit_move_insn (reg,
- adjust_address (base_address, Pmode, offset));
- offset += UNITS_PER_WORD;
-
- if (ix86_cfa_state->reg == crtl->drap_reg
- && regno == REGNO (crtl->drap_reg))
+ if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
{
/* Previously we'd represented the CFA as an expression
like *(%ebp - 8). We've just popped that value from
the stack pointer. */
add_reg_note (insn, REG_CFA_DEF_CFA, reg);
RTX_FRAME_RELATED_P (insn) = 1;
+
+ /* This means that the DRAP register is valid for addressing. */
+ m->fs.drap_valid = true;
}
else
- ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
+ ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
- red_offset += UNITS_PER_WORD;
+ cfa_offset -= UNITS_PER_WORD;
}
}
-/* Emit code to restore saved registers using MOV insns. First register
- is restored from POINTER + OFFSET. */
+/* Emit code to restore saved registers using MOV insns.
+ First register is restored from CFA - CFA_OFFSET. */
static void
-ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
- HOST_WIDE_INT red_offset,
+ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
int maybe_eh_return)
{
- int regno;
- rtx base_address = gen_rtx_MEM (TImode, pointer);
- rtx mem;
+ unsigned int regno;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
{
- rtx reg = gen_rtx_REG (TImode, regno);
-
- /* Ensure that adjust_address won't be forced to produce pointer
- out of range allowed by x86-64 instruction set. */
- if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
- {
- rtx r11;
+ rtx reg = gen_rtx_REG (V4SFmode, regno);
+ rtx mem;
- r11 = gen_rtx_REG (DImode, R11_REG);
- emit_move_insn (r11, GEN_INT (offset));
- emit_insn (gen_adddi3 (r11, r11, pointer));
- base_address = gen_rtx_MEM (TImode, r11);
- offset = 0;
- }
- mem = adjust_address (base_address, TImode, offset);
+ mem = choose_baseaddr (cfa_offset);
+ mem = gen_rtx_MEM (V4SFmode, mem);
set_mem_align (mem, 128);
emit_move_insn (reg, mem);
- offset += 16;
- ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
+ ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
- red_offset += 16;
+ cfa_offset -= 16;
}
}
void
ix86_expand_epilogue (int style)
{
- int sp_valid;
+ struct machine_function *m = cfun->machine;
+ struct machine_frame_state frame_state_save = m->fs;
struct ix86_frame frame;
- HOST_WIDE_INT offset, red_offset;
- struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
+ bool restore_regs_via_mov;
bool using_drap;
ix86_finalize_stack_realign_flags ();
+ ix86_compute_frame_layout (&frame);
- /* When stack is realigned, SP must be valid. */
- sp_valid = (!frame_pointer_needed
- || current_function_sp_is_unchanging
- || stack_realign_fp);
+ m->fs.sp_valid = (!frame_pointer_needed
+ || (current_function_sp_is_unchanging
+ && !stack_realign_fp));
+ gcc_assert (!m->fs.sp_valid
+ || m->fs.sp_offset == frame.stack_pointer_offset);
- ix86_compute_frame_layout (&frame);
+ /* The FP must be valid if the frame pointer is present. */
+ gcc_assert (frame_pointer_needed == m->fs.fp_valid);
+ gcc_assert (!m->fs.fp_valid
+ || m->fs.fp_offset == frame.hard_frame_pointer_offset);
+
+ /* We must have *some* valid pointer to the stack frame. */
+ gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
+
+ /* The DRAP is never valid at this point. */
+ gcc_assert (!m->fs.drap_valid);
/* See the comment about red zone and frame
pointer usage in ix86_expand_prologue. */
if (frame_pointer_needed && frame.red_zone_size)
- emit_insn (gen_memory_blockage ());
+ emit_insn (gen_memory_blockage ());
using_drap = crtl->drap_reg && crtl->stack_realign_needed;
- gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
+ gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
+
+ /* Determine the CFA offset of the end of the red-zone. */
+ m->fs.red_zone_offset = 0;
+ if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
+ {
+ /* The red-zone begins below the return address. */
+ m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
- /* Calculate start of saved registers relative to ebp. Special care
- must be taken for the normal return case of a function using
- eh_return: the eax and edx registers are marked as saved, but not
- restored along this path. */
- offset = frame.nregs;
+ /* When the register save area is in the aligned portion of
+ the stack, determine the maximum runtime displacement that
+ matches up with the aligned frame. */
+ if (stack_realign_drap)
+ m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
+ + UNITS_PER_WORD);
+ }
+
+ /* Special care must be taken for the normal return case of a function
+ using eh_return: the eax and edx registers are marked as saved, but
+ not restored along this path. Adjust the save location to match. */
if (crtl->calls_eh_return && style != 2)
- offset -= 2;
- offset *= -UNITS_PER_WORD;
- offset -= frame.nsseregs * 16 + frame.padding0;
-
- /* Calculate start of saved registers relative to esp on entry of the
- function. When realigning stack, this needs to be the most negative
- value possible at runtime. */
- red_offset = offset;
- if (using_drap)
- red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
- + UNITS_PER_WORD;
- else if (stack_realign_fp)
- red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
- - UNITS_PER_WORD;
- if (ix86_static_chain_on_stack)
- red_offset -= UNITS_PER_WORD;
- if (frame_pointer_needed)
- red_offset -= UNITS_PER_WORD;
+ frame.reg_save_offset -= 2 * UNITS_PER_WORD;
/* If we're only restoring one register and sp is not valid then
using a move instruction to restore the register since it's
- less work than reloading sp and popping the register.
-
- The default code result in stack adjustment using add/lea instruction,
- while this code results in LEAVE instruction (or discrete equivalent),
- so it is profitable in some other cases as well. Especially when there
- are no registers to restore. We also use this code when TARGET_USE_LEAVE
- and there is exactly one register to pop. This heuristic may need some
- tuning in future. */
- if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
- || (TARGET_EPILOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue
- && ((frame.nregs + frame.nsseregs) > 1
- || (frame.to_allocate + frame.padding0) != 0))
- || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
- && (frame.to_allocate + frame.padding0) != 0)
- || (frame_pointer_needed && TARGET_USE_LEAVE
- && cfun->machine->use_fast_prologue_epilogue
- && (frame.nregs + frame.nsseregs) == 1)
- || crtl->calls_eh_return)
- {
- /* Restore registers. We can use ebp or esp to address the memory
- locations. If both are available, default to ebp, since offsets
- are known to be small. Only exception is esp pointing directly
- to the end of block of saved registers, where we may simplify
- addressing mode.
-
- If we are realigning stack with bp and sp, regs restore can't
- be addressed by bp. sp must be used instead. */
-
- if (!frame_pointer_needed
- || (sp_valid && !(frame.to_allocate + frame.padding0))
- || stack_realign_fp)
- {
- ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate, red_offset,
- style == 2);
- ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate
- + frame.nsseregs * 16
- + frame.padding0,
- red_offset
- + frame.nsseregs * 16
- + frame.padding0, style == 2);
+ less work than reloading sp and popping the register. */
+ if (!m->fs.sp_valid && frame.nregs <= 1)
+ restore_regs_via_mov = true;
+ /* EH_RETURN requires the use of moves to function properly. */
+ else if (crtl->calls_eh_return)
+ restore_regs_via_mov = true;
+ else if (TARGET_EPILOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && (frame.nregs > 1
+ || m->fs.sp_offset != frame.reg_save_offset))
+ restore_regs_via_mov = true;
+ else if (frame_pointer_needed
+ && !frame.nregs
+ && m->fs.sp_offset != frame.reg_save_offset)
+ restore_regs_via_mov = true;
+ else if (frame_pointer_needed
+ && TARGET_USE_LEAVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && frame.nregs == 1)
+ restore_regs_via_mov = true;
+ else
+ restore_regs_via_mov = false;
+
+ if (restore_regs_via_mov || frame.nsseregs)
+ {
+ /* Ensure that the entire register save area is addressable via
+ the stack pointer, if we will restore via sp. */
+ if (TARGET_64BIT
+ && m->fs.sp_offset > 0x7fffffff
+ && !(m->fs.fp_valid || m->fs.drap_valid)
+ && (frame.nsseregs + frame.nregs) != 0)
+ {
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (m->fs.sp_offset
+ - frame.sse_reg_save_offset),
+ style,
+ m->fs.cfa_reg == stack_pointer_rtx);
}
- else
- {
- ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
- offset, red_offset,
- style == 2);
- ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
- offset
- + frame.nsseregs * 16
- + frame.padding0,
- red_offset
- + frame.nsseregs * 16
- + frame.padding0, style == 2);
- }
+ }
+
+ /* If there are any SSE registers to restore, then we have to do it
+ via moves, since there's obviously no pop for SSE regs. */
+ if (frame.nsseregs)
+ ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
+ style == 2);
+
+ if (restore_regs_via_mov)
+ {
+ rtx t;
- red_offset -= offset;
+ if (frame.nregs)
+ ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
/* eh_return epilogues need %ecx added to the stack pointer. */
if (style == 2)
{
- rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
+ rtx insn, sa = EH_RETURN_STACKADJ_RTX;
/* Stack align doesn't work with eh_return. */
- gcc_assert (!crtl->stack_realign_needed);
+ gcc_assert (!stack_realign_drap);
/* Neither does regparm nested functions. */
gcc_assert (!ix86_static_chain_on_stack);
if (frame_pointer_needed)
{
- tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
- tmp = plus_constant (tmp, UNITS_PER_WORD);
- tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
+ t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
+ t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
+ emit_insn (gen_rtx_SET (VOIDmode, sa, t));
- tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
- tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
+ t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
+ insn = emit_move_insn (hard_frame_pointer_rtx, t);
/* Note that we use SA as a temporary CFA, as the return
address is at the proper place relative to it. We
other reasonable register to use for the CFA. We don't
bother resetting the CFA to the SP for the duration of
the return insn. */
- add_reg_note (tmp, REG_CFA_DEF_CFA,
+ add_reg_note (insn, REG_CFA_DEF_CFA,
plus_constant (sa, UNITS_PER_WORD));
- ix86_add_queued_cfa_restore_notes (tmp);
- add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
- RTX_FRAME_RELATED_P (tmp) = 1;
- ix86_cfa_state->reg = sa;
- ix86_cfa_state->offset = UNITS_PER_WORD;
+ ix86_add_queued_cfa_restore_notes (insn);
+ add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ m->fs.cfa_reg = sa;
+ m->fs.cfa_offset = UNITS_PER_WORD;
+ m->fs.fp_valid = false;
pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
const0_rtx, style, false);
}
else
{
- tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
- tmp = plus_constant (tmp, (frame.to_allocate
- + frame.nregs * UNITS_PER_WORD
- + frame.nsseregs * 16
- + frame.padding0));
- tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
- ix86_add_queued_cfa_restore_notes (tmp);
-
- gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
- if (ix86_cfa_state->offset != UNITS_PER_WORD)
+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
+ t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
+ insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
+ ix86_add_queued_cfa_restore_notes (insn);
+
+ gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+ if (m->fs.cfa_offset != UNITS_PER_WORD)
{
- ix86_cfa_state->offset = UNITS_PER_WORD;
- add_reg_note (tmp, REG_CFA_DEF_CFA,
+ m->fs.cfa_offset = UNITS_PER_WORD;
+ add_reg_note (insn, REG_CFA_DEF_CFA,
plus_constant (stack_pointer_rtx,
UNITS_PER_WORD));
- RTX_FRAME_RELATED_P (tmp) = 1;
+ RTX_FRAME_RELATED_P (insn) = 1;
}
}
- }
- else if (!frame_pointer_needed)
- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (frame.to_allocate
- + frame.nregs * UNITS_PER_WORD
- + frame.nsseregs * 16
- + frame.padding0),
- style, !using_drap);
- /* If not an i386, mov & pop is faster than "leave". */
- else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
- || !cfun->machine->use_fast_prologue_epilogue)
- ix86_emit_leave (red_offset);
- else
- {
- pro_epilogue_adjust_stack (stack_pointer_rtx,
- hard_frame_pointer_rtx,
- const0_rtx, style, !using_drap);
-
- ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
+ m->fs.sp_offset = UNITS_PER_WORD;
+ m->fs.sp_valid = true;
}
}
else
{
/* First step is to deallocate the stack frame so that we can
- pop the registers.
-
- If we realign stack with frame pointer, then stack pointer
- won't be able to recover via lea $offset(%bp), %sp, because
- there is a padding area between bp and sp for realign.
- "add $to_allocate, %sp" must be used instead. */
- if (!sp_valid)
+ pop the registers. */
+ if (!m->fs.sp_valid)
{
- gcc_assert (frame_pointer_needed);
- gcc_assert (!stack_realign_fp);
- pro_epilogue_adjust_stack (stack_pointer_rtx,
- hard_frame_pointer_rtx,
- GEN_INT (offset), style, false);
- ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
- 0, red_offset,
- style == 2);
- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (frame.nsseregs * 16
- + frame.padding0),
+ pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
+ GEN_INT (m->fs.fp_offset
+ - frame.reg_save_offset),
style, false);
}
- else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
+ else if (m->fs.sp_offset != frame.reg_save_offset)
{
- ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate, red_offset,
- style == 2);
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (frame.to_allocate
- + frame.nsseregs * 16
- + frame.padding0), style,
- !using_drap && !frame_pointer_needed);
+ GEN_INT (m->fs.sp_offset
+ - frame.reg_save_offset),
+ style,
+ m->fs.cfa_reg == stack_pointer_rtx);
}
- ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
- + frame.padding0);
- red_offset -= offset;
+ ix86_emit_restore_regs_using_pop ();
+ }
- if (frame_pointer_needed)
- {
- /* Leave results in shorter dependency chains on CPUs that are
- able to grok it fast. */
- if (TARGET_USE_LEAVE)
- ix86_emit_leave (red_offset);
- else
- {
- /* For stack realigned really happens, recover stack
- pointer to hard frame pointer is a must, if not using
- leave. */
- if (stack_realign_fp)
- pro_epilogue_adjust_stack (stack_pointer_rtx,
- hard_frame_pointer_rtx,
- const0_rtx, style, !using_drap);
- ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
- red_offset);
- }
- }
+ /* If we used a stack pointer and haven't already got rid of it,
+ then do so now. */
+ if (m->fs.fp_valid)
+ {
+ /* If the stack pointer is valid and pointing at the frame
+ pointer store address, then we only need a pop. */
+ if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
+ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
+ /* Leave results in shorter dependency chains on CPUs that are
+ able to grok it fast. */
+ else if (TARGET_USE_LEAVE
+ || optimize_function_for_size_p (cfun)
+ || !cfun->machine->use_fast_prologue_epilogue)
+ ix86_emit_leave ();
+ else
+ {
+ pro_epilogue_adjust_stack (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ const0_rtx, style, !using_drap);
+ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
+ }
}
if (using_drap)
if (!call_used_regs[REGNO (crtl->drap_reg)])
param_ptr_offset += UNITS_PER_WORD;
- insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
- crtl->drap_reg,
- GEN_INT (-param_ptr_offset)));
-
- ix86_cfa_state->reg = stack_pointer_rtx;
- ix86_cfa_state->offset = param_ptr_offset;
+ insn = emit_insn (gen_rtx_SET
+ (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode,
+ crtl->drap_reg,
+ GEN_INT (-param_ptr_offset))));
+ m->fs.cfa_reg = stack_pointer_rtx;
+ m->fs.cfa_offset = param_ptr_offset;
+ m->fs.sp_offset = param_ptr_offset;
+ m->fs.realigned = false;
add_reg_note (insn, REG_CFA_DEF_CFA,
- gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
- GEN_INT (ix86_cfa_state->offset)));
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (param_ptr_offset)));
RTX_FRAME_RELATED_P (insn) = 1;
if (!call_used_regs[REGNO (crtl->drap_reg)])
- ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
+ ix86_emit_restore_reg_using_pop (crtl->drap_reg);
}
- /* Remove the saved static chain from the stack. The use of ECX is
- merely as a scratch register, not as the actual static chain. */
- if (ix86_static_chain_on_stack)
+ /* At this point the stack pointer must be valid, and we must have
+ restored all of the registers. We may not have deallocated the
+ entire stack frame. We've delayed this until now because it may
+ be possible to merge the local stack deallocation with the
+ deallocation forced by ix86_static_chain_on_stack. */
+ gcc_assert (m->fs.sp_valid);
+ gcc_assert (!m->fs.fp_valid);
+ gcc_assert (!m->fs.realigned);
+ if (m->fs.sp_offset != UNITS_PER_WORD)
{
- rtx r, insn;
-
- gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
- ix86_cfa_state->offset += UNITS_PER_WORD;
-
- r = gen_rtx_REG (Pmode, CX_REG);
- insn = emit_insn (ix86_gen_pop1 (r));
-
- r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
- r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
- add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
- RTX_FRAME_RELATED_P (insn) = 1;
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
+ style, true);
}
/* Sibcall epilogues don't want a return instruction. */
if (style == 0)
{
- *ix86_cfa_state = cfa_state_save;
+ m->fs = frame_state_save;
return;
}
/* There is no "pascal" calling convention in any 64bit ABI. */
gcc_assert (!TARGET_64BIT);
- insn = emit_insn (gen_popsi1 (ecx));
- ix86_cfa_state->offset -= UNITS_PER_WORD;
+ insn = emit_insn (gen_pop (ecx));
+ m->fs.cfa_offset -= UNITS_PER_WORD;
+ m->fs.sp_offset -= UNITS_PER_WORD;
add_reg_note (insn, REG_CFA_ADJUST_CFA,
copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
/* Restore the state back to the state from the prologue,
so that it's correct for the next epilogue. */
- *ix86_cfa_state = cfa_state_save;
+ m->fs = frame_state_save;
}
/* Reset from the function's potential modifications. */
to test cfun for being non-NULL. */
if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
&& base_reg && !index_reg && !disp
- && REG_P (base_reg)
- && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
+ && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
disp = const0_rtx;
/* Special case: encode reg+reg instead of reg*2. */
/* Determine if a given CONST RTX is a valid memory displacement
in PIC mode. */
-int
+bool
legitimate_pic_address_disp_p (rtx disp)
{
bool saw_plus;
}
}
if (GET_CODE (disp) != CONST)
- return 0;
+ return false;
disp = XEXP (disp, 0);
if (TARGET_64BIT)
|| (XINT (disp, 1) != UNSPEC_GOTPCREL
&& XINT (disp, 1) != UNSPEC_GOTOFF
&& XINT (disp, 1) != UNSPEC_PLTOFF))
- return 0;
+ return false;
if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
&& GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
- return 0;
- return 1;
+ return false;
+ return true;
}
saw_plus = false;
if (GET_CODE (disp) == PLUS)
{
if (!CONST_INT_P (XEXP (disp, 1)))
- return 0;
+ return false;
disp = XEXP (disp, 0);
saw_plus = true;
}
if (TARGET_MACHO && darwin_local_data_pic (disp))
- return 1;
+ return true;
if (GET_CODE (disp) != UNSPEC)
- return 0;
+ return false;
switch (XINT (disp, 1))
{
&& SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
}
- return 0;
+ return false;
}
/* Recognizes RTL expressions that are valid memory addresses for an
if (h)
return h->to;
- *loc = h = GGC_NEW (struct tree_map);
+ *loc = h = ggc_alloc_tree_map ();
h->hash = in.hash;
h->base.from = decl;
h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
}
}
- if (changed && ix86_legitimate_address_p (mode, x, FALSE))
+ if (changed && ix86_legitimate_address_p (mode, x, false))
return x;
if (GET_CODE (XEXP (x, 0)) == MULT)
x = legitimize_pic_address (x, 0);
}
- if (changed && ix86_legitimate_address_p (mode, x, FALSE))
+ if (changed && ix86_legitimate_address_p (mode, x, false))
return x;
if (REG_P (XEXP (x, 0)))
break;
case SYMBOL_REF:
- if (! TARGET_MACHO || TARGET_64BIT)
+ if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
output_addr_const (file, x);
else
{
}
else
/* We can't handle floating point constants;
- PRINT_OPERAND must handle them. */
+ TARGET_PRINT_OPERAND must handle them. */
output_operand_lossage ("floating constant misused");
break;
Y -- print condition for XOP pcom* instruction.
+ -- print a branch hint as 'cs' or 'ds' prefix
; -- print a semicolon (after prefixes due to bug in older gas).
+ @ -- print a segment register of thread base pointer load
*/
void
-print_operand (FILE *file, rtx x, int code)
+ix86_print_operand (FILE *file, rtx x, int code)
{
if (code)
{
if (!REG_P (x))
{
putc ('[', file);
- PRINT_OPERAND (file, x, 0);
+ ix86_print_operand (file, x, 0);
putc (']', file);
return;
}
gcc_unreachable ();
}
- PRINT_OPERAND (file, x, 0);
+ ix86_print_operand (file, x, 0);
return;
output_operand_lossage
("invalid operand size for operand code '%c'", code);
return;
-
+
case 'd':
case 'b':
case 'w':
case 's':
if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
{
- PRINT_OPERAND (file, x, 0);
+ ix86_print_operand (file, x, 0);
fputs (", ", file);
}
return;
return;
case ';':
-#if TARGET_MACHO || !HAVE_AS_IX86_REP_LOCK_PREFIX
- fputs (";", file);
+#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
+ putc (';', file);
#endif
return;
+ case '@':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('%', file);
+
+ /* The kernel uses a different segment register for performance
+ reasons; a system call would not have to trash the userspace
+ segment register, which would be expensive. */
+ if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
+ fputs ("fs", file);
+ else
+ fputs ("gs", file);
+ return;
+
default:
output_operand_lossage ("invalid operand code '%c'", code);
}
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('$', file);
- fprintf (file, "0x%08lx", (long unsigned int) l);
+ /* Sign extend 32bit SFmode immediate to 8 bytes. */
+ if (code == 'q')
+ fprintf (file, "0x%08llx", (unsigned long long) (int) l);
+ else
+ fprintf (file, "0x%08x", (unsigned int) l);
}
/* These float cases don't actually occur as immediate operands. */
output_addr_const (file, x);
}
}
+
+static bool
+ix86_print_operand_punct_valid_p (unsigned char code)
+{
+ return (code == '@' || code == '*' || code == '+'
+ || code == '&' || code == ';');
+}
\f
/* Print a memory operand whose address is ADDR. */
-void
-print_operand_address (FILE *file, rtx addr)
+static void
+ix86_print_operand_address (FILE *file, rtx addr)
{
struct ix86_address parts;
rtx base, index, disp;
}
}
-bool
-output_addr_const_extra (FILE *file, rtx x)
+/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
+
+static bool
+i386_asm_output_addr_const_extra (FILE *file, rtx x)
{
rtx op;
gcc_assert (!TARGET_64BIT);
#endif
- fprintf (file, "%s" LPREFIX "%d\n", directive, value);
+ fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
}
void
#endif
/* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
if (TARGET_64BIT || TARGET_VXWORKS_RTP)
- fprintf (file, "%s" LPREFIX "%d-" LPREFIX "%d\n",
- directive, value, rel);
+ fprintf (file, "%s%s%d-%s%d\n",
+ directive, LPREFIX, value, LPREFIX, rel);
else if (HAVE_AS_GOTOFF_IN_DATA)
- fprintf (file, ASM_LONG LPREFIX "%d@GOTOFF\n", value);
+ fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
#if TARGET_MACHO
else if (TARGET_MACHO)
{
- fprintf (file, ASM_LONG LPREFIX "%d-", value);
+ fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
machopic_output_function_base_name (file);
putc ('\n', file);
}
#endif
else
- asm_fprintf (file, ASM_LONG "%U%s+[.-" LPREFIX "%d]\n",
- GOT_SYMBOL_NAME, value);
+ asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
+ GOT_SYMBOL_NAME, LPREFIX, value);
}
\f
/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
op1 = gen_lowpart (mode, op1);
switch (mode)
- {
+ {
case V4SFmode:
emit_insn (gen_avx_movups (op0, op1));
break;
if (MEM_P (op1))
{
/* If we're optimizing for size, movups is the smallest. */
- if (optimize_insn_for_size_p ()
+ if (optimize_insn_for_size_p ()
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_movupd (op0, op1));
}
else
{
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_movups (op0, op1));
}
else
{
/* Return TRUE or FALSE depending on whether the binary operator meets the
appropriate constraints. */
-int
+bool
ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
rtx operands[3])
{
/* Both source operands cannot be in memory. */
if (MEM_P (src1) && MEM_P (src2))
- return 0;
+ return false;
/* Canonicalize operand order for commutative operators. */
if (ix86_swap_binary_operands_p (code, mode, operands))
/* If the destination is memory, we must have a matching source operand. */
if (MEM_P (dst) && !rtx_equal_p (dst, src1))
- return 0;
+ return false;
/* Source 1 cannot be a constant. */
if (CONSTANT_P (src1))
- return 0;
+ return false;
/* Source 1 cannot be a non-matching memory. */
if (MEM_P (src1) && !rtx_equal_p (dst, src1))
- return 0;
+ return false;
- return 1;
+ return true;
}
/* Attempt to expand a unary operator. Make the expansion closer to the
#define LEA_SEARCH_THRESHOLD 12
/* Search backward for non-agu definition of register number REGNO1
- or register number REGNO2 in INSN's basic block until
+ or register number REGNO2 in INSN's basic block until
1. Pass LEA_SEARCH_THRESHOLD instructions, or
2. Reach BB boundary, or
3. Reach agu definition.
prev = PREV_INSN (prev);
}
}
-
+
if (distance < LEA_SEARCH_THRESHOLD)
{
edge e;
edge_iterator ei;
bool simple_loop = false;
-
+
FOR_EACH_EDGE (e, ei, bb->preds)
if (e->src == bb)
{
simple_loop = true;
break;
}
-
+
if (simple_loop)
{
rtx prev = BB_END (bb);
return distance;
}
-/* Return the distance between INSN and the next insn that uses
+/* Return the distance between INSN and the next insn that uses
register number REGNO0 in memory address. Return -1 if no such
a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
edge e;
edge_iterator ei;
bool simple_loop = false;
-
+
FOR_EACH_EDGE (e, ei, bb->succs)
if (e->dest == bb)
{
simple_loop = true;
break;
}
-
+
if (simple_loop)
{
rtx next = BB_HEAD (bb);
next = NEXT_INSN (next);
}
}
- }
+ }
return -1;
}
#define IX86_LEA_PRIORITY 2
/* Return true if it is ok to optimize an ADD operation to LEA
- operation to avoid flag register consumation. For the processors
- like ATOM, if the destination register of LEA holds an actual
- address which will be used soon, LEA is better and otherwise ADD
- is better. */
+ operation to avoid flag register consumation. For most processors,
+ ADD is faster than LEA. For the processors like ATOM, if the
+ destination register of LEA holds an actual address which will be
+ used soon, LEA is better and otherwise ADD is better. */
bool
-ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
- rtx insn, rtx operands[])
+ix86_lea_for_add_ok (rtx insn, rtx operands[])
{
unsigned int regno0 = true_regnum (operands[0]);
unsigned int regno1 = true_regnum (operands[1]);
- unsigned int regno2;
-
- if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
- return regno0 != regno1;
-
- regno2 = true_regnum (operands[2]);
+ unsigned int regno2 = true_regnum (operands[2]);
/* If a = b + c, (a!=b && a!=c), must use lea form. */
if (regno0 != regno1 && regno0 != regno2)
return true;
- else
+
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+ return false;
+ else
{
int dist_define, dist_use;
dist_define = distance_non_agu_define (regno1, regno2, insn);
break;
}
- if (shift_rtx
+ if (shift_rtx
&& (GET_CODE (shift_rtx) == ASHIFT
|| GET_CODE (shift_rtx) == LSHIFTRT
|| GET_CODE (shift_rtx) == ASHIFTRT
/* Return TRUE or FALSE depending on whether the unary operator meets the
appropriate constraints. */
-int
+bool
ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED,
rtx operands[2] ATTRIBUTE_UNUSED)
if ((MEM_P (operands[0])
|| MEM_P (operands[1]))
&& ! rtx_equal_p (operands[0], operands[1]))
- return FALSE;
- return TRUE;
+ return false;
+ return true;
}
/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
has source and destination with matching CC modes, and that the
CC mode is at least as constrained as REQ_MODE. */
-int
+bool
ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
{
rtx set;
if (req_mode != CCNOmode
&& (req_mode != CCmode
|| XEXP (SET_SRC (set), 1) != const0_rtx))
- return 0;
+ return false;
break;
case CCmode:
if (req_mode == CCGCmode)
- return 0;
+ return false;
/* FALLTHRU */
case CCGCmode:
if (req_mode == CCGOCmode || req_mode == CCNOmode)
- return 0;
+ return false;
/* FALLTHRU */
case CCGOCmode:
if (req_mode == CCZmode)
- return 0;
+ return false;
/* FALLTHRU */
case CCAmode:
case CCCmode:
gcc_unreachable ();
}
- return (GET_MODE (SET_SRC (set)) == set_mode);
+ return GET_MODE (SET_SRC (set)) == set_mode;
}
/* Generate insn patterns to do an integer compare of OPERANDS. */
}
-/* Return a comparison we can do and that it is equivalent to
+/* Return a comparison we can do and that it is equivalent to
swap_condition (code) apart possibly from orderedness.
But, never change orderedness if TARGET_IEEE_FP, returning
UNKNOWN in that case if necessary. */
const0_rtx);
}
-rtx
-ix86_expand_compare (enum rtx_code code)
+static rtx
+ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
{
- rtx op0, op1, ret;
- op0 = ix86_compare_op0;
- op1 = ix86_compare_op1;
+ rtx ret;
- if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
- ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+ ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
{
}
void
-ix86_expand_branch (enum rtx_code code, rtx label)
+ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
{
rtx tmp;
- switch (GET_MODE (ix86_compare_op0))
+ switch (GET_MODE (op0))
{
case SFmode:
case DFmode:
case HImode:
case SImode:
simple:
- tmp = ix86_expand_compare (code);
+ tmp = ix86_expand_compare (code, op0, op1);
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
gen_rtx_LABEL_REF (VOIDmode, label),
pc_rtx);
enum rtx_code code1, code2, code3;
enum machine_mode submode;
- if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
+ if (CONSTANT_P (op0) && !CONSTANT_P (op1))
{
- tmp = ix86_compare_op0;
- ix86_compare_op0 = ix86_compare_op1;
- ix86_compare_op1 = tmp;
+ tmp = op0, op0 = op1, op1 = tmp;
code = swap_condition (code);
}
- if (GET_MODE (ix86_compare_op0) == DImode)
+ if (GET_MODE (op0) == DImode)
{
- split_di (&ix86_compare_op0, 1, lo+0, hi+0);
- split_di (&ix86_compare_op1, 1, lo+1, hi+1);
+ split_di (&op0, 1, lo+0, hi+0);
+ split_di (&op1, 1, lo+1, hi+1);
submode = SImode;
}
else
{
- split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
- split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
+ split_ti (&op0, 1, lo+0, hi+0);
+ split_ti (&op1, 1, lo+1, hi+1);
submode = DImode;
}
tmp = expand_binop (submode, ior_optab, xor1, xor0,
NULL_RTX, 0, OPTAB_WIDEN);
- ix86_compare_op0 = tmp;
- ix86_compare_op1 = const0_rtx;
- ix86_expand_branch (code, label);
+ ix86_expand_branch (code, tmp, const0_rtx, label);
return;
}
case LT: case LTU: case GE: case GEU:
if (lo[1] == const0_rtx)
{
- ix86_compare_op0 = hi[0];
- ix86_compare_op1 = hi[1];
- ix86_expand_branch (code, label);
+ ix86_expand_branch (code, hi[0], hi[1], label);
return;
}
break;
case LE: case LEU: case GT: case GTU:
if (lo[1] == constm1_rtx)
{
- ix86_compare_op0 = hi[0];
- ix86_compare_op1 = hi[1];
- ix86_expand_branch (code, label);
+ ix86_expand_branch (code, hi[0], hi[1], label);
return;
}
break;
* false:
*/
- ix86_compare_op0 = hi[0];
- ix86_compare_op1 = hi[1];
-
if (code1 != UNKNOWN)
- ix86_expand_branch (code1, label);
+ ix86_expand_branch (code1, hi[0], hi[1], label);
if (code2 != UNKNOWN)
- ix86_expand_branch (code2, label2);
+ ix86_expand_branch (code2, hi[0], hi[1], label2);
- ix86_compare_op0 = lo[0];
- ix86_compare_op1 = lo[1];
- ix86_expand_branch (code3, label);
+ ix86_expand_branch (code3, lo[0], lo[1], label);
if (code2 != UNKNOWN)
emit_label (label2);
}
default:
- /* If we have already emitted a compare insn, go straight to simple.
- ix86_expand_compare won't emit anything if ix86_compare_emitted
- is non NULL. */
- gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
+ gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
goto simple;
}
}
}
void
-ix86_expand_setcc (enum rtx_code code, rtx dest)
+ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
{
rtx ret;
gcc_assert (GET_MODE (dest) == QImode);
- ret = ix86_expand_compare (code);
+ ret = ix86_expand_compare (code, op0, op1);
PUT_MODE (ret, QImode);
emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
}
return false;
op0 = force_reg (mode, op0);
}
- ix86_compare_op0 = op0;
- ix86_compare_op1 = op1;
- *pop = ix86_expand_compare (code);
+ *pop = ix86_expand_compare (code, op0, op1);
gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
return true;
}
-int
+bool
ix86_expand_int_movcc (rtx operands[])
{
enum rtx_code code = GET_CODE (operands[1]), compare_code;
rtx compare_seq, compare_op;
enum machine_mode mode = GET_MODE (operands[0]);
bool sign_bit_compare_p = false;
+ rtx op0 = XEXP (operands[1], 0);
+ rtx op1 = XEXP (operands[1], 1);
start_sequence ();
- ix86_compare_op0 = XEXP (operands[1], 0);
- ix86_compare_op1 = XEXP (operands[1], 1);
- compare_op = ix86_expand_compare (code);
+ compare_op = ix86_expand_compare (code, op0, op1);
compare_seq = get_insns ();
end_sequence ();
compare_code = GET_CODE (compare_op);
- if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
- || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
+ if ((op1 == const0_rtx && (code == GE || code == LT))
+ || (op1 == constm1_rtx && (code == GT || code == LE)))
sign_bit_compare_p = true;
/* Don't attempt mode expansion here -- if we had to expand 5 or 6
/* Sign bit compares are better done using shifts than we do by using
sbb. */
if (sign_bit_compare_p
- || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
- ix86_compare_op1, &compare_op))
+ || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
{
/* Detect overlap between destination and compare sources. */
rtx tmp = out;
}
diff = ct - cf;
- if (reg_overlap_mentioned_p (out, ix86_compare_op0)
- || reg_overlap_mentioned_p (out, ix86_compare_op1))
+ if (reg_overlap_mentioned_p (out, op0)
+ || reg_overlap_mentioned_p (out, op1))
tmp = gen_reg_rtx (mode);
if (mode == DImode)
cf = tmp;
diff = ct - cf;
}
- tmp = emit_store_flag (tmp, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, -1);
+ tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
}
if (diff == 1)
if (!rtx_equal_p (tmp, out))
emit_move_insn (copy_rtx (out), copy_rtx (tmp));
- return 1; /* DONE */
+ return true;
}
if (diff < 0)
{
- enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
+ enum machine_mode cmp_mode = GET_MODE (op0);
HOST_WIDE_INT tmp;
tmp = ct, ct = cf, cf = tmp;
}
compare_code = UNKNOWN;
- if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
- && CONST_INT_P (ix86_compare_op1))
+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
+ && CONST_INT_P (op1))
{
- if (ix86_compare_op1 == const0_rtx
+ if (op1 == const0_rtx
&& (code == LT || code == GE))
compare_code = code;
- else if (ix86_compare_op1 == constm1_rtx)
+ else if (op1 == constm1_rtx)
{
if (code == LE)
compare_code = LT;
/* Optimize dest = (op0 < 0) ? -1 : cf. */
if (compare_code != UNKNOWN
- && GET_MODE (ix86_compare_op0) == GET_MODE (out)
+ && GET_MODE (op0) == GET_MODE (out)
&& (cf == -1 || ct == -1))
{
/* If lea code below could be used, only optimize
code = reverse_condition (code);
}
- out = emit_store_flag (out, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, -1);
+ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
out = expand_simple_binop (mode, IOR,
out, GEN_INT (cf),
if (out != operands[0])
emit_move_insn (operands[0], out);
- return 1; /* DONE */
+ return true;
}
}
rtx tmp;
int nops;
- out = emit_store_flag (out, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, 1);
+ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
nops = 0;
/* On x86_64 the lea instruction operates on Pmode, so we need
if (!rtx_equal_p (out, operands[0]))
emit_move_insn (operands[0], copy_rtx (out));
- return 1; /* DONE */
+ return true;
}
/*
{
if (cf == 0)
{
- enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
+ enum machine_mode cmp_mode = GET_MODE (op0);
cf = ct;
ct = 0;
ct = tmp;
}
- out = emit_store_flag (out, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, -1);
+ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
}
else
{
- out = emit_store_flag (out, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, 1);
+ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
- out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
+ out = expand_simple_binop (mode, PLUS, copy_rtx (out),
+ constm1_rtx,
copy_rtx (out), 1, OPTAB_DIRECT);
}
if (!rtx_equal_p (out, operands[0]))
emit_move_insn (operands[0], copy_rtx (out));
- return 1; /* DONE */
+ return true;
}
}
rtx var, orig_out, out, tmp;
if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
- return 0; /* FAIL */
+ return false;
/* If one of the two operands is an interesting constant, load a
constant with the above and mask it in with a logical operation. */
else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
operands[3] = const0_rtx, op = ior_optab;
else
- return 0; /* FAIL */
+ return false;
}
else if (CONST_INT_P (operands[3]))
{
else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
operands[2] = const0_rtx, op = ior_optab;
else
- return 0; /* FAIL */
+ return false;
}
else
- return 0; /* FAIL */
+ return false;
orig_out = operands[0];
tmp = gen_reg_rtx (mode);
/* Recurse to get the constant loaded. */
if (ix86_expand_int_movcc (operands) == 0)
- return 0; /* FAIL */
+ return false;
/* Mask in the interesting variable. */
out = expand_binop (mode, op, var, tmp, orig_out, 0,
if (!rtx_equal_p (out, orig_out))
emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
- return 1; /* DONE */
+ return true;
}
/*
gen_rtx_IF_THEN_ELSE (mode,
compare_op, operands[2],
operands[3])));
-
- return 1; /* DONE */
+ return true;
}
/* Swap, force into registers, or otherwise massage the two operands
/* Expand a floating-point conditional move. Return true if successful. */
-int
+bool
ix86_expand_fp_movcc (rtx operands[])
{
enum machine_mode mode = GET_MODE (operands[0]);
enum rtx_code code = GET_CODE (operands[1]);
rtx tmp, compare_op;
+ rtx op0 = XEXP (operands[1], 0);
+ rtx op1 = XEXP (operands[1], 1);
- ix86_compare_op0 = XEXP (operands[1], 0);
- ix86_compare_op1 = XEXP (operands[1], 1);
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
{
enum machine_mode cmode;
/* Since we've no cmove for sse registers, don't force bad register
allocation just to gain access to it. Deny movcc when the
comparison mode doesn't match the move mode. */
- cmode = GET_MODE (ix86_compare_op0);
+ cmode = GET_MODE (op0);
if (cmode == VOIDmode)
- cmode = GET_MODE (ix86_compare_op1);
+ cmode = GET_MODE (op1);
if (cmode != mode)
- return 0;
+ return false;
- code = ix86_prepare_sse_fp_compare_args (operands[0], code,
- &ix86_compare_op0,
- &ix86_compare_op1);
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
if (code == UNKNOWN)
- return 0;
+ return false;
- if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
- ix86_compare_op1, operands[2],
- operands[3]))
- return 1;
+ if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
+ operands[2], operands[3]))
+ return true;
- tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
- ix86_compare_op1, operands[2], operands[3]);
+ tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
+ operands[2], operands[3]);
ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
- return 1;
+ return true;
}
/* The floating point conditional move instructions don't directly
support conditions resulting from a signed integer comparison. */
- compare_op = ix86_expand_compare (code);
+ compare_op = ix86_expand_compare (code, op0, op1);
if (!fcmov_comparison_operator (compare_op, VOIDmode))
{
tmp = gen_reg_rtx (QImode);
- ix86_expand_setcc (code, tmp);
- code = NE;
- ix86_compare_op0 = tmp;
- ix86_compare_op1 = const0_rtx;
- compare_op = ix86_expand_compare (code);
+ ix86_expand_setcc (tmp, code, op0, op1);
+
+ compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
}
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
gen_rtx_IF_THEN_ELSE (mode, compare_op,
operands[2], operands[3])));
- return 1;
+ return true;
}
/* Expand a floating-point vector conditional move; a vcond operation
if (unsigned_p)
unpack = gen_sse4_1_zero_extendv8qiv8hi2;
else
- unpack = gen_sse4_1_extendv8qiv8hi2;
+ unpack = gen_sse4_1_sign_extendv8qiv8hi2;
break;
case V8HImode:
if (unsigned_p)
unpack = gen_sse4_1_zero_extendv4hiv4si2;
else
- unpack = gen_sse4_1_extendv4hiv4si2;
+ unpack = gen_sse4_1_sign_extendv4hiv4si2;
break;
case V4SImode:
if (unsigned_p)
unpack = gen_sse4_1_zero_extendv2siv2di2;
else
- unpack = gen_sse4_1_extendv2siv2di2;
+ unpack = gen_sse4_1_sign_extendv2siv2di2;
break;
default:
gcc_unreachable ();
/* Expand conditional increment or decrement using adb/sbb instructions.
The default case using setcc followed by the conditional move can be
done by generic code. */
-int
+bool
ix86_expand_int_addcc (rtx operands[])
{
enum rtx_code code = GET_CODE (operands[1]);
rtx val = const0_rtx;
bool fpcmp = false;
enum machine_mode mode;
+ rtx op0 = XEXP (operands[1], 0);
+ rtx op1 = XEXP (operands[1], 1);
- ix86_compare_op0 = XEXP (operands[1], 0);
- ix86_compare_op1 = XEXP (operands[1], 1);
if (operands[3] != const1_rtx
&& operands[3] != constm1_rtx)
- return 0;
- if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
- ix86_compare_op1, &compare_op))
- return 0;
+ return false;
+ if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
+ return false;
code = GET_CODE (compare_op);
flags = XEXP (compare_op, 0);
}
emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
- return 1; /* DONE */
+ return true;
}
&& alg != rep_prefix_4_byte \
&& alg != rep_prefix_8_byte))
const struct processor_costs *cost;
-
+
/* Even if the string operation call is cold, we still might spend a lot
of time processing large blocks. */
if (optimize_function_for_size_p (cfun)
4) Epilogue: code copying tail of the block that is too small to be
handled by main body (or up to size guarded by prologue guard). */
-int
+bool
ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
rtx expected_align_exp, rtx expected_size_exp)
{
/* Make sure we don't need to care about overflow later on. */
if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
- return 0;
+ return false;
/* Step 0: Decide on preferred algorithm, desired alignment and
size of chunks to be copied by main loop. */
align = desired_align;
if (alg == libcall)
- return 0;
+ return false;
gcc_assert (alg != no_stringop);
if (!count)
count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
epilogue_size_needed);
if (jump_around_label)
emit_label (jump_around_label);
- return 1;
+ return true;
}
/* Helper function for memcpy. For QImode value 0xXY produce
if (mode == SImode)
emit_insn (gen_movsi_insv_1 (reg, reg));
else
- emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
+ emit_insn (gen_movdi_insv_1 (reg, reg));
else
{
tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
/* Expand string clear operation (bzero). Use i386 string operations when
profitable. See expand_movmem comment for explanation of individual
steps performed. */
-int
+bool
ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
rtx expected_align_exp, rtx expected_size_exp)
{
/* Make sure we don't need to care about overflow later on. */
if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
- return 0;
+ return false;
/* Step 0: Decide on preferred algorithm, desired alignment and
size of chunks to be copied by main loop. */
align = desired_align;
if (alg == libcall)
- return 0;
+ return false;
gcc_assert (alg != no_stringop);
if (!count)
count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
}
if (jump_around_label)
emit_label (jump_around_label);
- return 1;
+ return true;
}
/* Expand the appropriate insns for doing strlen if not just doing
QImode, 1, end_0_label);
/* Increment the address. */
- emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
+ emit_insn (ix86_gen_add3 (out, out, const1_rtx));
/* Not needed with an alignment of 2 */
if (align != 2)
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
end_0_label);
- emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
+ emit_insn (ix86_gen_add3 (out, out, const1_rtx));
emit_label (align_3_label);
}
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
end_0_label);
- emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
+ emit_insn (ix86_gen_add3 (out, out, const1_rtx));
}
/* Generate loop to check 4 bytes at a time. It is not a good idea to
mem = change_address (src, SImode, out);
emit_move_insn (scratch, mem);
- emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
+ emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
/* This formula yields a nonzero result iff one of the bytes is zero.
This saves three branches inside loop and many cycles. */
/* Not in the first two. Move two bytes forward. */
emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
- emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
+ emit_insn (ix86_gen_add3 (out, out, const2_rtx));
emit_label (end_2_label);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
tmp = gen_rtx_REG (CCmode, FLAGS_REG);
cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
- emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), tmp, cmp));
+ emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
emit_label (end_0_label);
}
/* Expand strlen. */
-int
+bool
ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
{
rtx addr, scratch1, scratch2, scratch3, scratch4;
&& !TARGET_INLINE_ALL_STRINGOPS
&& !optimize_insn_for_size_p ()
&& (!CONST_INT_P (align) || INTVAL (align) < 4))
- return 0;
+ return false;
addr = force_reg (Pmode, XEXP (src, 0));
scratch1 = gen_reg_rtx (Pmode);
/* strlensi_unroll_1 returns the address of the zero at the end of
the string, like memchr(), so compute the length by subtracting
the start address. */
- emit_insn ((*ix86_gen_sub3) (out, out, addr));
+ emit_insn (ix86_gen_sub3 (out, out, addr));
}
else
{
unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
scratch4), UNSPEC_SCAS);
emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
- emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
- emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
+ emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
+ emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
}
- return 1;
+ return true;
}
/* For given symbol (function) construct code to compute address of it's PLT
}
if (ix86_cmodel == CM_LARGE_PIC
- && MEM_P (fnaddr)
+ && MEM_P (fnaddr)
&& GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
&& !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
{
struct machine_function *f;
- f = GGC_CNEW (struct machine_function);
+ f = ggc_alloc_cleared_machine_function ();
f->use_fast_prologue_epilogue_nregs = -1;
f->tls_descriptor_call_expanded_p = 0;
f->call_abi = ix86_abi;
if (s->mode == mode && s->n == n)
return copy_rtx (s->rtl);
- s = (struct stack_local_entry *)
- ggc_alloc (sizeof (struct stack_local_entry));
+ s = ggc_alloc_stack_local_entry ();
s->n = n;
s->mode = mode;
s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
}
/* Emit RTL insns to initialize the variable parts of a trampoline.
- FNDECL is the decl of the target address; M_TRAMP is a MEM for
+ FNDECL is the decl of the target address; M_TRAMP is a MEM for
the trampoline, and CHAIN_VALUE is an RTX for the static chain
to be passed to the target function. */
IX86_BUILTIN_CLZS,
+ /* FSGSBASE instructions. */
+ IX86_BUILTIN_RDFSBASE32,
+ IX86_BUILTIN_RDFSBASE64,
+ IX86_BUILTIN_RDGSBASE32,
+ IX86_BUILTIN_RDGSBASE64,
+ IX86_BUILTIN_WRFSBASE32,
+ IX86_BUILTIN_WRFSBASE64,
+ IX86_BUILTIN_WRGSBASE32,
+ IX86_BUILTIN_WRGSBASE64,
+
+ /* RDRND instructions. */
+ IX86_BUILTIN_RDRAND16,
+ IX86_BUILTIN_RDRAND32,
+ IX86_BUILTIN_RDRAND64,
+
+ /* F16C instructions. */
+ IX86_BUILTIN_CVTPH2PS,
+ IX86_BUILTIN_CVTPH2PS256,
+ IX86_BUILTIN_CVTPS2PH,
+ IX86_BUILTIN_CVTPS2PH256,
+
IX86_BUILTIN_MAX
};
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
+ /* FSGSBASE */
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
+ { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
+
+ /* RDRND */
+ { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
+ { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+ { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
};
/* Builtins with variable number of arguments. */
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
{ OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
+
+ /* F16C */
+ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
+ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
+ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
};
/* FMA4 and XOP. */
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
-
+
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
-
+
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
{ OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
TYPE_PRECISION (float80_type_node) = 80;
layout_type (float80_type_node);
}
- (*lang_hooks.types.register_builtin_type) (float80_type_node, "__float80");
+ lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
/* The __float128 type. */
float128_type_node = make_node (REAL_TYPE);
TYPE_PRECISION (float128_type_node) = 128;
layout_type (float128_type_node);
- (*lang_hooks.types.register_builtin_type) (float128_type_node, "__float128");
+ lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
/* This macro is built by i386-builtin-types.awk. */
DEFINE_BUILTIN_PRIMITIVE_TYPES;
if (optimize || !target
|| GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ || !insn_data[icode].operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
if (GET_MODE (op1) == SImode && mode1 == TImode)
op1 = gen_lowpart (TImode, x);
}
- if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+ if (!insn_data[icode].operand[1].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
- if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ if (!insn_data[icode].operand[2].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (icode) (target, op0, op1);
if (optimize || !target
|| GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ || !insn_data[icode].operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
gcc_assert (nargs <= 4);
gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
if (optimize
- || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
+ || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
|| num_memory > 1)
op = force_reg (mode, op);
}
if (optimize || !target
|| GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ || !insn_data[icode].operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
if (VECTOR_MODE_P (mode0))
op0 = safe_vector_operand (op0, mode0);
if ((optimize && !register_operand (op0, mode0))
- || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ || !insn_data[icode].operand[1].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
op1 = op0;
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
+ if (!insn_data[icode].operand[2].predicate (op1, mode0))
op1 = copy_to_mode_reg (mode0, op1);
pat = GEN_FCN (icode) (target, op0, op1);
if (optimize || !target
|| GET_MODE (target) != tmode
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
+ || !insn_data[d->icode].operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
if ((optimize && !register_operand (op0, mode0))
- || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
+ || !insn_data[d->icode].operand[1].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if ((optimize && !register_operand (op1, mode1))
- || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
+ || !insn_data[d->icode].operand[2].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
target = gen_rtx_SUBREG (QImode, target, 0);
if ((optimize && !register_operand (op0, mode0))
- || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ || !insn_data[d->icode].operand[0].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ || !insn_data[d->icode].operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (d->icode) (op0, op1);
target = gen_rtx_SUBREG (QImode, target, 0);
if ((optimize && !register_operand (op0, mode0))
- || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ || !insn_data[d->icode].operand[0].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ || !insn_data[d->icode].operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (d->icode) (op0, op1);
if (VECTOR_MODE_P (modev4))
op2 = safe_vector_operand (op2, modev4);
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
op0 = copy_to_mode_reg (modev2, op0);
- if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
op1 = copy_to_mode_reg (modei3, op1);
if ((optimize && !register_operand (op2, modev4))
- || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
+ || !insn_data[d->icode].operand[4].predicate (op2, modev4))
op2 = copy_to_mode_reg (modev4, op2);
- if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
op3 = copy_to_mode_reg (modei5, op3);
- if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
+ if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
{
error ("the fifth argument must be a 8-bit immediate");
return const0_rtx;
{
if (optimize || !target
|| GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ || !insn_data[d->icode].operand[0].predicate (target, tmode0))
target = gen_reg_rtx (tmode0);
scratch1 = gen_reg_rtx (tmode1);
{
if (optimize || !target
|| GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ || !insn_data[d->icode].operand[1].predicate (target, tmode1))
target = gen_reg_rtx (tmode1);
scratch0 = gen_reg_rtx (tmode0);
if (VECTOR_MODE_P (modev3))
op1 = safe_vector_operand (op1, modev3);
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
op0 = copy_to_mode_reg (modev2, op0);
if ((optimize && !register_operand (op1, modev3))
- || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
+ || !insn_data[d->icode].operand[3].predicate (op1, modev3))
op1 = copy_to_mode_reg (modev3, op1);
- if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
{
error ("the third argument must be a 8-bit immediate");
return const0_rtx;
{
if (optimize || !target
|| GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ || !insn_data[d->icode].operand[0].predicate (target, tmode0))
target = gen_reg_rtx (tmode0);
scratch1 = gen_reg_rtx (tmode1);
{
if (optimize || !target
|| GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ || !insn_data[d->icode].operand[1].predicate (target, tmode1))
target = gen_reg_rtx (tmode1);
scratch0 = gen_reg_rtx (tmode0);
} args[4];
bool last_arg_count = false;
enum insn_code icode = d->icode;
- const struct insn_data *insn_p = &insn_data[icode];
+ const struct insn_data_d *insn_p = &insn_data[icode];
enum machine_mode tmode = insn_p->operand[0].mode;
enum machine_mode rmode = VOIDmode;
bool swap = false;
case V8SF_FTYPE_V8SF:
case V8SF_FTYPE_V8SI:
case V8SF_FTYPE_V4SF:
+ case V8SF_FTYPE_V8HI:
case V4SI_FTYPE_V4SI:
case V4SI_FTYPE_V16QI:
case V4SI_FTYPE_V4SF:
case V4SF_FTYPE_V4SI:
case V4SF_FTYPE_V8SF:
case V4SF_FTYPE_V4DF:
+ case V4SF_FTYPE_V8HI:
case V4SF_FTYPE_V2DF:
case V2DI_FTYPE_V2DI:
case V2DI_FTYPE_V16QI:
nargs_constant = 1;
break;
case V8HI_FTYPE_V8HI_INT:
+ case V8HI_FTYPE_V8SF_INT:
+ case V8HI_FTYPE_V4SF_INT:
case V8SF_FTYPE_V8SF_INT:
case V4SI_FTYPE_V4SI_INT:
case V4SI_FTYPE_V8SI_INT:
case V8HI_FTYPE_V8HI_V8HI_INT:
case V8SI_FTYPE_V8SI_V8SI_INT:
case V8SI_FTYPE_V8SI_V4SI_INT:
- case V8SF_FTYPE_V8SF_V8SF_INT:
- case V8SF_FTYPE_V8SF_V4SF_INT:
+ case V8SF_FTYPE_V8SF_V8SF_INT:
+ case V8SF_FTYPE_V8SF_V4SF_INT:
case V4SI_FTYPE_V4SI_V4SI_INT:
case V4DF_FTYPE_V4DF_V4DF_INT:
case V4DF_FTYPE_V4DF_V2DF_INT:
if (optimize
|| target == 0
|| GET_MODE (target) != tmode
- || ! (*insn_p->operand[0].predicate) (target, tmode))
+ || !insn_p->operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
real_target = target;
}
tree arg = CALL_EXPR_ARG (exp, i);
rtx op = expand_normal (arg);
enum machine_mode mode = insn_p->operand[i + 1].mode;
- bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
+ bool match = insn_p->operand[i + 1].predicate (op, mode);
if (last_arg_count && (i + 1) == nargs)
{
if (!match)
{
op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
- if (!(*insn_p->operand[i + 1].predicate) (op, mode))
+ if (!insn_p->operand[i + 1].predicate (op, mode))
op = copy_to_reg (op);
}
}
} args[3];
enum insn_code icode = d->icode;
bool last_arg_constant = false;
- const struct insn_data *insn_p = &insn_data[icode];
+ const struct insn_data_d *insn_p = &insn_data[icode];
enum machine_mode tmode = insn_p->operand[0].mode;
enum { load, store } klass;
case VOID_FTYPE_VOID:
emit_insn (GEN_FCN (icode) (target));
return 0;
+ case VOID_FTYPE_UINT64:
+ case VOID_FTYPE_UNSIGNED:
+ nargs = 0;
+ klass = store;
+ memory = 0;
+ break;
+ break;
case UINT64_FTYPE_VOID:
+ case UNSIGNED_FTYPE_VOID:
+ case UINT16_FTYPE_VOID:
nargs = 0;
klass = load;
memory = 0;
arg = CALL_EXPR_ARG (exp, 0);
op = expand_normal (arg);
gcc_assert (target == 0);
- target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ if (memory)
+ target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ else
+ target = force_reg (tmode, op);
arg_adjust = 1;
}
else
if (optimize
|| target == 0
|| GET_MODE (target) != tmode
- || ! (*insn_p->operand[0].predicate) (target, tmode))
+ || !insn_p->operand[0].predicate (target, tmode))
target = gen_reg_rtx (tmode);
}
arg = CALL_EXPR_ARG (exp, i + arg_adjust);
op = expand_normal (arg);
- match = (*insn_p->operand[i + 1].predicate) (op, mode);
+ match = insn_p->operand[i + 1].predicate (op, mode);
if (last_arg_constant && (i + 1) == nargs)
{
op0 = force_reg (Pmode, op0);
op0 = gen_rtx_MEM (mode1, op0);
- if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+ if (!insn_data[icode].operand[0].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+ if (!insn_data[icode].operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
- if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
+ if (!insn_data[icode].operand[2].predicate (op2, mode2))
op2 = copy_to_mode_reg (mode2, op2);
pat = GEN_FCN (icode) (op0, op1, op2);
if (! pat)
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
icode = CODE_FOR_sse2_clflush;
- if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
+ if (!insn_data[icode].operand[0].predicate (op0, Pmode))
op0 = copy_to_mode_reg (Pmode, op0);
emit_insn (gen_sse2_clflush (op0));
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op2 = copy_to_mode_reg (SImode, op2);
- emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
+ emit_insn (ix86_gen_monitor (op0, op1, op2));
return 0;
case IX86_BUILTIN_MWAIT:
arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
icode = CODE_FOR_lwp_llwpcb;
- if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
+ if (!insn_data[icode].operand[0].predicate (op0, Pmode))
op0 = copy_to_mode_reg (Pmode, op0);
emit_insn (gen_lwp_llwpcb (op0));
return 0;
case IX86_BUILTIN_SLWPCB:
icode = CODE_FOR_lwp_slwpcb;
if (!target
- || ! (*insn_data[icode].operand[0].predicate) (target, Pmode))
+ || !insn_data[icode].operand[0].predicate (target, Pmode))
target = gen_reg_rtx (Pmode);
emit_insn (gen_lwp_slwpcb (target));
return target;
/* Dispatch to a handler for a vectorization library. */
if (ix86_veclib_handler)
- return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
- type_in);
+ return ix86_veclib_handler ((enum built_in_function) fn, type_out,
+ type_in);
return NULL_TREE;
}
rtx result;
gcc_assert (reload_completed);
- if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
+ if (ix86_using_red_zone ())
{
result = gen_rtx_MEM (mode,
gen_rtx_PLUS (Pmode,
GEN_INT (-RED_ZONE_SIZE)));
emit_move_insn (result, operand);
}
- else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
+ else if (TARGET_64BIT)
{
switch (mode)
{
void
ix86_free_from_memory (enum machine_mode mode)
{
- if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
+ if (!ix86_using_red_zone ())
{
int size;
/* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
same. */
-static const enum reg_class *
+static const reg_class_t *
i386_ira_cover_classes (void)
{
- static const enum reg_class sse_fpmath_classes[] = {
+ static const reg_class_t sse_fpmath_classes[] = {
GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
};
- static const enum reg_class no_sse_fpmath_classes[] = {
+ static const reg_class_t no_sse_fpmath_classes[] = {
GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
};
return regclass;
}
-static enum reg_class
-ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
+static reg_class_t
+ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
enum machine_mode mode,
secondary_reload_info *sri ATTRIBUTE_UNUSED)
{
return NO_REGS;
}
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
+
+static bool
+ix86_class_likely_spilled_p (reg_class_t rclass)
+{
+ switch (rclass)
+ {
+ case AREG:
+ case DREG:
+ case CREG:
+ case BREG:
+ case AD_REGS:
+ case SIREG:
+ case DIREG:
+ case SSE_FIRST_REG:
+ case FP_TOP_REG:
+ case FP_SECOND_REG:
+ return true;
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
/* If we are copying between general and FP registers, we need a memory
location. The same is true for SSE and MMX registers.
When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
enforce these sanity checks. */
-static inline int
+static inline bool
inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
- enum machine_mode mode, int strict)
+ enum machine_mode mode, int strict)
{
if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
|| MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
return false;
}
-int
+bool
ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
enum machine_mode mode, int strict)
{
}
}
-int
-ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
+static int
+ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
+ bool in)
{
- return inline_memory_move_cost (mode, regclass, in);
+ return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
}
on some machines it is expensive to move between registers if they are not
general registers. */
-int
-ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
- enum reg_class class2)
+static int
+ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
+ reg_class_t class2_i)
{
+ enum reg_class class1 = (enum reg_class) class1_i;
+ enum reg_class class2 = (enum reg_class) class2_i;
+
/* In case we require secondary memory, compute cost of the store followed
by load. In order to avoid bad register allocation choices, we need
for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
gcc_assert (!TARGET_64BIT);
/* Lose our funky encoding stuff so it doesn't contaminate the stub. */
- symb = (*targetm.strip_name_encoding) (symb);
+ symb = targetm.strip_name_encoding (symb);
length = strlen (stub);
binder_name = XALLOCAVEC (char, length + 32);
warning (OPT_Wattributes, "%qE attribute only applies to functions",
name);
*no_add_attrs = true;
- return NULL_TREE;
- }
-
- if (TARGET_64BIT)
- {
- warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
- name);
- return NULL_TREE;
}
-
-#ifndef HAVE_AS_IX86_SWAP
- sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
-#endif
-
- return NULL_TREE;
+ return NULL_TREE;
}
static bool
ix86_ms_bitfield_layout_p (const_tree record_type)
{
- return (TARGET_MS_BITFIELD_LAYOUT &&
- !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
- || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
+ return ((TARGET_MS_BITFIELD_LAYOUT
+ && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
+ || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
}
/* Returns an expression indicating where the this parameter is
return false;
/* Need a free register for GOT references. */
- if (flag_pic && !(*targetm.binds_local_p) (function))
+ if (flag_pic && !targetm.binds_local_p (function))
return false;
/* Otherwise ok. */
xops[0] = XEXP (DECL_RTL (function), 0);
if (TARGET_64BIT)
{
- if (!flag_pic || (*targetm.binds_local_p) (function))
+ if (!flag_pic || targetm.binds_local_p (function))
output_asm_insn ("jmp\t%P0", xops);
/* All thunks should be in the same object as their target,
and thus binds_local_p should be true. */
}
else
{
- if (!flag_pic || (*targetm.binds_local_p) (function))
+ if (!flag_pic || targetm.binds_local_p (function))
output_asm_insn ("jmp\t%P0", xops);
else
#if TARGET_MACHO
if (TARGET_MACHO)
{
rtx sym_ref = XEXP (DECL_RTL (function), 0);
- tmp = (gen_rtx_SYMBOL_REF
+ if (TARGET_MACHO_BRANCH_ISLANDS)
+ sym_ref = (gen_rtx_SYMBOL_REF
(Pmode,
machopic_indirection_name (sym_ref, /*stub_p=*/true)));
- tmp = gen_rtx_MEM (QImode, tmp);
+ tmp = gen_rtx_MEM (QImode, sym_ref);
xops[0] = tmp;
output_asm_insn ("jmp\t%0", xops);
}
void
x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
{
+ const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
+ : MCOUNT_NAME);
+
if (TARGET_64BIT)
{
#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tleaq\t" LPREFIX "P%d(%%rip),%%r11\n", labelno);
+ fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
#endif
if (DEFAULT_ABI == SYSV_ABI && flag_pic)
- fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file);
+ fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
else
- fputs ("\tcall\t" MCOUNT_NAME "\n", file);
+ fprintf (file, "\tcall\t%s\n", mcount_name);
}
else if (flag_pic)
{
#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tleal\t" LPREFIX "P%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
- labelno);
+ fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
+ LPREFIX, labelno);
#endif
- fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file);
+ fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
}
else
{
#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tmovl\t$" LPREFIX "P%d,%%" PROFILE_COUNT_REGISTER "\n",
- labelno);
+ fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
+ LPREFIX, labelno);
#endif
- fputs ("\tcall\t" MCOUNT_NAME "\n", file);
+ fprintf (file, "\tcall\t%s\n", mcount_name);
}
}
emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
var = force_reg (GET_MODE_INNER (mode), var);
ix86_expand_vector_set (mmx_ok, target, var, one_var);
- return true;
+ return true;
}
switch (mode)
rtx (*gen_load_even) (rtx, rtx, rtx);
rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
-
+
switch (mode)
{
case V8HImode:
default:
gcc_unreachable ();
}
-
+
for (i = 0; i < n; i++)
{
/* Extend the odd elment to SImode using a paradoxical SUBREG. */
/* Cast the V4SImode vector back to a vector in orignal mode. */
op0 = gen_reg_rtx (mode);
emit_move_insn (op0, gen_lowpart (mode, op1));
-
+
/* Load even elements into the second positon. */
- emit_insn ((*gen_load_even) (op0,
- force_reg (inner_mode,
- ops [i + i + 1]),
- const1_rtx));
+ emit_insn (gen_load_even (op0,
+ force_reg (inner_mode,
+ ops [i + i + 1]),
+ const1_rtx));
/* Cast vector to FIRST_IMODE vector. */
ops[i] = gen_reg_rtx (first_imode);
for (i = j = 0; i < n; i += 2, j++)
{
op0 = gen_reg_rtx (first_imode);
- emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
+ emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
/* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
ops[j] = gen_reg_rtx (second_imode);
for (i = j = 0; i < n / 2; i += 2, j++)
{
op0 = gen_reg_rtx (second_imode);
- emit_insn ((*gen_interleave_second_low) (op0, ops[i],
- ops[i + 1]));
+ emit_insn (gen_interleave_second_low (op0, ops[i],
+ ops[i + 1]));
/* Cast the SECOND_IMODE vector to the THIRD_IMODE
vector. */
case V2DImode:
op0 = gen_reg_rtx (second_imode);
- emit_insn ((*gen_interleave_second_low) (op0, ops[0],
- ops[1]));
+ emit_insn (gen_interleave_second_low (op0, ops[0],
+ ops[1]));
/* Cast the SECOND_IMODE vector back to a vector on original
mode. */
break;
/* Don't use ix86_expand_vector_init_interleave if we can't
- move from GPR to SSE register directly. */
+ move from GPR to SSE register directly. */
if (!TARGET_INTER_UNIT_MOVES)
break;
/* Extract the half. */
tmp = gen_reg_rtx (half_mode);
- emit_insn ((*gen_extract[j][i]) (tmp, target));
+ emit_insn (gen_extract[j][i] (tmp, target));
/* Put val in tmp at elt. */
ix86_expand_vector_set (false, tmp, val, elt);
/* Put it back. */
- emit_insn ((*gen_insert[j][i]) (target, target, tmp));
+ emit_insn (gen_insert[j][i] (target, target, tmp));
return;
default:
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
-ix86_builtin_vectorization_cost (bool runtime_test)
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype ATTRIBUTE_UNUSED,
+ int misalign ATTRIBUTE_UNUSED)
{
- /* If the branch of the runtime test is taken - i.e. - the vectorized
- version is skipped - this incurs a misprediction cost (because the
- vectorized version is expected to be the fall-through). So we subtract
- the latency of a mispredicted branch from the costs that are incured
- when the vectorized version is executed.
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ return ix86_cost->scalar_stmt_cost;
- TODO: The values in individual target tables have to be tuned or new
- fields may be needed. For eg. on K8, the default branch path is the
- not-taken path. If the taken path is predicted correctly, the minimum
- penalty of going down the taken-path is 1 cycle. If the taken-path is
- not predicted correctly, then the minimum penalty is 10 cycles. */
+ case scalar_load:
+ return ix86_cost->scalar_load_cost;
- if (runtime_test)
- {
- return (-(ix86_cost->cond_taken_branch_cost));
+ case scalar_store:
+ return ix86_cost->scalar_store_cost;
+
+ case vector_stmt:
+ return ix86_cost->vec_stmt_cost;
+
+ case vector_load:
+ return ix86_cost->vec_align_load_cost;
+
+ case vector_store:
+ return ix86_cost->vec_store_cost;
+
+ case vec_to_scalar:
+ return ix86_cost->vec_to_scalar_cost;
+
+ case scalar_to_vec:
+ return ix86_cost->scalar_to_vec_cost;
+
+ case unaligned_load:
+ case unaligned_store:
+ return ix86_cost->vec_unalign_load_cost;
+
+ case cond_branch_taken:
+ return ix86_cost->cond_taken_branch_cost;
+
+ case cond_branch_not_taken:
+ return ix86_cost->cond_not_taken_branch_cost;
+
+ case vec_perm:
+ return 1;
+
+ default:
+ gcc_unreachable ();
}
- else
- return 0;
}
+
/* Implement targetm.vectorize.builtin_vec_perm. */
static tree
tree itype = TREE_TYPE (vec_type);
bool u = TYPE_UNSIGNED (itype);
enum machine_mode vmode = TYPE_MODE (vec_type);
- enum ix86_builtins fcode = fcode; /* Silence bogus warning. */
+ enum ix86_builtins fcode;
bool ok = TARGET_SSE2;
switch (vmode)
nelt = d->nelt;
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
-
+
/* Generate two permutation masks. If the required element is within
the given vector it is shuffled into the proper lane. If the required
element is in the other vector, force a zero into the lane by setting
d.op1 = d.op0;
break;
}
-
+
d.target = gen_reg_rtx (d.vmode);
if (ix86_expand_vec_perm_builtin_1 (&d))
return d.target;
an error generated from the extract. */
gcc_assert (vec_mask > 0 && vec_mask <= 3);
one_vec = (vec_mask != 3);
-
+
/* Implementable with shufps or pshufd. */
if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
return true;
/* This function returns the calling abi specific va_list type node.
It returns the FNDECL specific va_list type. */
-tree
+static tree
ix86_fn_abi_va_list (tree fndecl)
{
if (!TARGET_64BIT)
/* Returns the canonical va_list type specified by TYPE. If there
is no valid TYPE provided, it return NULL_TREE. */
-tree
+static tree
ix86_canonical_va_list_type (tree type)
{
tree wtype, htype;
/* Resolve references and pointers to va_list type. */
- if (INDIRECT_REF_P (type))
+ if (TREE_CODE (type) == MEM_REF)
type = TREE_TYPE (type);
else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
type = TREE_TYPE (type);
+ else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
+ type = TREE_TYPE (type);
if (TARGET_64BIT)
{
}
/* Iterate through the target-specific builtin types for va_list.
- IDX denotes the iterator, *PTREE is set to the result type of
- the va_list builtin, and *PNAME to its internal type.
- Returns zero if there is no element for this index, otherwise
- IDX should be increased upon the next call.
- Note, do not iterate a base builtin's name like __builtin_va_list.
- Used from c_common_nodes_and_builtins. */
+ IDX denotes the iterator, *PTREE is set to the result type of
+ the va_list builtin, and *PNAME to its internal type.
+ Returns zero if there is no element for this index, otherwise
+ IDX should be increased upon the next call.
+ Note, do not iterate a base builtin's name like __builtin_va_list.
+ Used from c_common_nodes_and_builtins. */
-int
+static int
ix86_enum_va_list (int idx, const char **pname, tree *ptree)
{
- if (!TARGET_64BIT)
+ if (TARGET_64BIT)
+ {
+ switch (idx)
+ {
+ default:
+ break;
+
+ case 0:
+ *ptree = ms_va_list_type_node;
+ *pname = "__builtin_ms_va_list";
+ return 1;
+
+ case 1:
+ *ptree = sysv_va_list_type_node;
+ *pname = "__builtin_sysv_va_list";
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+#undef TARGET_SCHED_DISPATCH
+#define TARGET_SCHED_DISPATCH has_dispatch
+#undef TARGET_SCHED_DISPATCH_DO
+#define TARGET_SCHED_DISPATCH_DO do_dispatch
+
+/* The size of the dispatch window is the total number of bytes of
+ object code allowed in a window. */
+#define DISPATCH_WINDOW_SIZE 16
+
+/* Number of dispatch windows considered for scheduling. */
+#define MAX_DISPATCH_WINDOWS 3
+
+/* Maximum number of instructions in a window. */
+#define MAX_INSN 4
+
+/* Maximum number of immediate operands in a window. */
+#define MAX_IMM 4
+
+/* Maximum number of immediate bits allowed in a window. */
+#define MAX_IMM_SIZE 128
+
+/* Maximum number of 32 bit immediates allowed in a window. */
+#define MAX_IMM_32 4
+
+/* Maximum number of 64 bit immediates allowed in a window. */
+#define MAX_IMM_64 2
+
+/* Maximum total of loads or prefetches allowed in a window. */
+#define MAX_LOAD 2
+
+/* Maximum total of stores allowed in a window. */
+#define MAX_STORE 1
+
+#undef BIG
+#define BIG 100
+
+
+/* Dispatch groups. Istructions that affect the mix in a dispatch window. */
+enum dispatch_group {
+ disp_no_group = 0,
+ disp_load,
+ disp_store,
+ disp_load_store,
+ disp_prefetch,
+ disp_imm,
+ disp_imm_32,
+ disp_imm_64,
+ disp_branch,
+ disp_cmp,
+ disp_jcc,
+ disp_last
+};
+
+/* Number of allowable groups in a dispatch window. It is an array
+ indexed by dispatch_group enum. 100 is used as a big number,
+ because the number of these kind of operations does not have any
+ effect in dispatch window, but we need them for other reasons in
+ the table. */
+static unsigned int num_allowable_groups[disp_last] = {
+ 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
+};
+
+char group_name[disp_last + 1][16] = {
+ "disp_no_group", "disp_load", "disp_store", "disp_load_store",
+ "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
+ "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
+};
+
+/* Instruction path. */
+enum insn_path {
+ no_path = 0,
+ path_single, /* Single micro op. */
+ path_double, /* Double micro op. */
+ path_multi, /* Instructions with more than 2 micro op.. */
+ last_path
+};
+
+/* sched_insn_info defines a window to the instructions scheduled in
+ the basic block. It contains a pointer to the insn_info table and
+ the instruction scheduled.
+
+ Windows are allocated for each basic block and are linked
+ together. */
+typedef struct sched_insn_info_s {
+ rtx insn;
+ enum dispatch_group group;
+ enum insn_path path;
+ int byte_len;
+ int imm_bytes;
+} sched_insn_info;
+
+/* Linked list of dispatch windows. This is a two way list of
+ dispatch windows of a basic block. It contains information about
+ the number of uops in the window and the total number of
+ instructions and of bytes in the object code for this dispatch
+ window. */
+typedef struct dispatch_windows_s {
+ int num_insn; /* Number of insn in the window. */
+ int num_uops; /* Number of uops in the window. */
+ int window_size; /* Number of bytes in the window. */
+ int window_num; /* Window number between 0 or 1. */
+ int num_imm; /* Number of immediates in an insn. */
+ int num_imm_32; /* Number of 32 bit immediates in an insn. */
+ int num_imm_64; /* Number of 64 bit immediates in an insn. */
+ int imm_size; /* Total immediates in the window. */
+ int num_loads; /* Total memory loads in the window. */
+ int num_stores; /* Total memory stores in the window. */
+ int violation; /* Violation exists in window. */
+ sched_insn_info *window; /* Pointer to the window. */
+ struct dispatch_windows_s *next;
+ struct dispatch_windows_s *prev;
+} dispatch_windows;
+
+/* Immediate valuse used in an insn. */
+typedef struct imm_info_s
+ {
+ int imm;
+ int imm32;
+ int imm64;
+ } imm_info;
+
+static dispatch_windows *dispatch_window_list;
+static dispatch_windows *dispatch_window_list1;
+
+/* Get dispatch group of insn. */
+
+static enum dispatch_group
+get_mem_group (rtx insn)
+{
+ enum attr_memory memory;
+
+ if (INSN_CODE (insn) < 0)
+ return disp_no_group;
+ memory = get_attr_memory (insn);
+ if (memory == MEMORY_STORE)
+ return disp_store;
+
+ if (memory == MEMORY_LOAD)
+ return disp_load;
+
+ if (memory == MEMORY_BOTH)
+ return disp_load_store;
+
+ return disp_no_group;
+}
+
+/* Return true if insn is a compare instruction. */
+
+static bool
+is_cmp (rtx insn)
+{
+ enum attr_type type;
+
+ type = get_attr_type (insn);
+ return (type == TYPE_TEST
+ || type == TYPE_ICMP
+ || type == TYPE_FCMP
+ || GET_CODE (PATTERN (insn)) == COMPARE);
+}
+
+/* Return true if a dispatch violation encountered. */
+
+static bool
+dispatch_violation (void)
+{
+ if (dispatch_window_list->next)
+ return dispatch_window_list->next->violation;
+ return dispatch_window_list->violation;
+}
+
+/* Return true if insn is a branch instruction. */
+
+static bool
+is_branch (rtx insn)
+{
+ return (CALL_P (insn) || JUMP_P (insn));
+}
+
+/* Return true if insn is a prefetch instruction. */
+
+static bool
+is_prefetch (rtx insn)
+{
+ return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
+}
+
+/* This function initializes a dispatch window and the list container holding a
+ pointer to the window. */
+
+static void
+init_window (int window_num)
+{
+ int i;
+ dispatch_windows *new_list;
+
+ if (window_num == 0)
+ new_list = dispatch_window_list;
+ else
+ new_list = dispatch_window_list1;
+
+ new_list->num_insn = 0;
+ new_list->num_uops = 0;
+ new_list->window_size = 0;
+ new_list->next = NULL;
+ new_list->prev = NULL;
+ new_list->window_num = window_num;
+ new_list->num_imm = 0;
+ new_list->num_imm_32 = 0;
+ new_list->num_imm_64 = 0;
+ new_list->imm_size = 0;
+ new_list->num_loads = 0;
+ new_list->num_stores = 0;
+ new_list->violation = false;
+
+ for (i = 0; i < MAX_INSN; i++)
+ {
+ new_list->window[i].insn = NULL;
+ new_list->window[i].group = disp_no_group;
+ new_list->window[i].path = no_path;
+ new_list->window[i].byte_len = 0;
+ new_list->window[i].imm_bytes = 0;
+ }
+ return;
+}
+
+/* This function allocates and initializes a dispatch window and the
+ list container holding a pointer to the window. */
+
+static dispatch_windows *
+allocate_window (void)
+{
+ dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
+ new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
+
+ return new_list;
+}
+
+/* This routine initializes the dispatch scheduling information. It
+ initiates building dispatch scheduler tables and constructs the
+ first dispatch window. */
+
+static void
+init_dispatch_sched (void)
+{
+ /* Allocate a dispatch list and a window. */
+ dispatch_window_list = allocate_window ();
+ dispatch_window_list1 = allocate_window ();
+ init_window (0);
+ init_window (1);
+}
+
+/* This function returns true if a branch is detected. End of a basic block
+ does not have to be a branch, but here we assume only branches end a
+ window. */
+
+static bool
+is_end_basic_block (enum dispatch_group group)
+{
+ return group == disp_branch;
+}
+
+/* This function is called when the end of a window processing is reached. */
+
+static void
+process_end_window (void)
+{
+ gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
+ if (dispatch_window_list->next)
+ {
+ gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
+ gcc_assert (dispatch_window_list->window_size
+ + dispatch_window_list1->window_size <= 48);
+ init_window (1);
+ }
+ init_window (0);
+}
+
+/* Allocates a new dispatch window and adds it to WINDOW_LIST.
+ WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
+ for 48 bytes of instructions. Note that these windows are not dispatch
+ windows that their sizes are DISPATCH_WINDOW_SIZE. */
+
+static dispatch_windows *
+allocate_next_window (int window_num)
+{
+ if (window_num == 0)
+ {
+ if (dispatch_window_list->next)
+ init_window (1);
+ init_window (0);
+ return dispatch_window_list;
+ }
+
+ dispatch_window_list->next = dispatch_window_list1;
+ dispatch_window_list1->prev = dispatch_window_list;
+
+ return dispatch_window_list1;
+}
+
+/* Increment the number of immediate operands of an instruction. */
+
+static int
+find_constant_1 (rtx *in_rtx, imm_info *imm_values)
+{
+ if (*in_rtx == 0)
return 0;
- switch (idx) {
- case 0:
- *ptree = ms_va_list_type_node;
- *pname = "__builtin_ms_va_list";
- break;
- case 1:
- *ptree = sysv_va_list_type_node;
- *pname = "__builtin_sysv_va_list";
- break;
- default:
+
+ switch ( GET_CODE (*in_rtx))
+ {
+ case CONST:
+ case SYMBOL_REF:
+ case CONST_INT:
+ (imm_values->imm)++;
+ if (x86_64_immediate_operand (*in_rtx, SImode))
+ (imm_values->imm32)++;
+ else
+ (imm_values->imm64)++;
+ break;
+
+ case CONST_DOUBLE:
+ (imm_values->imm)++;
+ (imm_values->imm64)++;
+ break;
+
+ case CODE_LABEL:
+ if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
+ {
+ (imm_values->imm)++;
+ (imm_values->imm32)++;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/* Compute number of immediate operands of an instruction. */
+
+static void
+find_constant (rtx in_rtx, imm_info *imm_values)
+{
+ for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
+ (rtx_function) find_constant_1, (void *) imm_values);
+}
+
+/* Return total size of immediate operands of an instruction along with number
+ of corresponding immediate-operands. It initializes its parameters to zero
+ befor calling FIND_CONSTANT.
+ INSN is the input instruction. IMM is the total of immediates.
+ IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
+ bit immediates. */
+
+static int
+get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
+{
+ imm_info imm_values = {0, 0, 0};
+
+ find_constant (insn, &imm_values);
+ *imm = imm_values.imm;
+ *imm32 = imm_values.imm32;
+ *imm64 = imm_values.imm64;
+ return imm_values.imm32 * 4 + imm_values.imm64 * 8;
+}
+
+/* This function indicates if an operand of an instruction is an
+ immediate. */
+
+static bool
+has_immediate (rtx insn)
+{
+ int num_imm_operand;
+ int num_imm32_operand;
+ int num_imm64_operand;
+
+ if (insn)
+ return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+ &num_imm64_operand);
+ return false;
+}
+
+/* Return single or double path for instructions. */
+
+static enum insn_path
+get_insn_path (rtx insn)
+{
+ enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
+
+ if ((int)path == 0)
+ return path_single;
+
+ if ((int)path == 1)
+ return path_double;
+
+ return path_multi;
+}
+
+/* Return insn dispatch group. */
+
+static enum dispatch_group
+get_insn_group (rtx insn)
+{
+ enum dispatch_group group = get_mem_group (insn);
+ if (group)
+ return group;
+
+ if (is_branch (insn))
+ return disp_branch;
+
+ if (is_cmp (insn))
+ return disp_cmp;
+
+ if (has_immediate (insn))
+ return disp_imm;
+
+ if (is_prefetch (insn))
+ return disp_prefetch;
+
+ return disp_no_group;
+}
+
+/* Count number of GROUP restricted instructions in a dispatch
+ window WINDOW_LIST. */
+
+static int
+count_num_restricted (rtx insn, dispatch_windows *window_list)
+{
+ enum dispatch_group group = get_insn_group (insn);
+ int imm_size;
+ int num_imm_operand;
+ int num_imm32_operand;
+ int num_imm64_operand;
+
+ if (group == disp_no_group)
return 0;
- }
+
+ if (group == disp_imm)
+ {
+ imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+ &num_imm64_operand);
+ if (window_list->imm_size + imm_size > MAX_IMM_SIZE
+ || num_imm_operand + window_list->num_imm > MAX_IMM
+ || (num_imm32_operand > 0
+ && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
+ || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
+ || (num_imm64_operand > 0
+ && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
+ || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
+ || (window_list->imm_size + imm_size == MAX_IMM_SIZE
+ && num_imm64_operand > 0
+ && ((window_list->num_imm_64 > 0
+ && window_list->num_insn >= 2)
+ || window_list->num_insn >= 3)))
+ return BIG;
+
+ return 1;
+ }
+
+ if ((group == disp_load_store
+ && (window_list->num_loads >= MAX_LOAD
+ || window_list->num_stores >= MAX_STORE))
+ || ((group == disp_load
+ || group == disp_prefetch)
+ && window_list->num_loads >= MAX_LOAD)
+ || (group == disp_store
+ && window_list->num_stores >= MAX_STORE))
+ return BIG;
+
return 1;
}
+/* This function returns true if insn satisfies dispatch rules on the
+ last window scheduled. */
+
+static bool
+fits_dispatch_window (rtx insn)
+{
+ dispatch_windows *window_list = dispatch_window_list;
+ dispatch_windows *window_list_next = dispatch_window_list->next;
+ unsigned int num_restrict;
+ enum dispatch_group group = get_insn_group (insn);
+ enum insn_path path = get_insn_path (insn);
+ int sum;
+
+ /* Make disp_cmp and disp_jcc get scheduled at the latest. These
+ instructions should be given the lowest priority in the
+ scheduling process in Haifa scheduler to make sure they will be
+ scheduled in the same dispatch window as the refrence to them. */
+ if (group == disp_jcc || group == disp_cmp)
+ return false;
+
+ /* Check nonrestricted. */
+ if (group == disp_no_group || group == disp_branch)
+ return true;
+
+ /* Get last dispatch window. */
+ if (window_list_next)
+ window_list = window_list_next;
+
+ if (window_list->window_num == 1)
+ {
+ sum = window_list->prev->window_size + window_list->window_size;
+
+ if (sum == 32
+ || (min_insn_size (insn) + sum) >= 48)
+ /* Window 1 is full. Go for next window. */
+ return true;
+ }
+
+ num_restrict = count_num_restricted (insn, window_list);
+
+ if (num_restrict > num_allowable_groups[group])
+ return false;
+
+ /* See if it fits in the first window. */
+ if (window_list->window_num == 0)
+ {
+ /* The first widow should have only single and double path
+ uops. */
+ if (path == path_double
+ && (window_list->num_uops + 2) > MAX_INSN)
+ return false;
+ else if (path != path_single)
+ return false;
+ }
+ return true;
+}
+
+/* Add an instruction INSN with NUM_UOPS micro-operations to the
+ dispatch window WINDOW_LIST. */
+
+static void
+add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
+{
+ int byte_len = min_insn_size (insn);
+ int num_insn = window_list->num_insn;
+ int imm_size;
+ sched_insn_info *window = window_list->window;
+ enum dispatch_group group = get_insn_group (insn);
+ enum insn_path path = get_insn_path (insn);
+ int num_imm_operand;
+ int num_imm32_operand;
+ int num_imm64_operand;
+
+ if (!window_list->violation && group != disp_cmp
+ && !fits_dispatch_window (insn))
+ window_list->violation = true;
+
+ imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+ &num_imm64_operand);
+
+ /* Initialize window with new instruction. */
+ window[num_insn].insn = insn;
+ window[num_insn].byte_len = byte_len;
+ window[num_insn].group = group;
+ window[num_insn].path = path;
+ window[num_insn].imm_bytes = imm_size;
+
+ window_list->window_size += byte_len;
+ window_list->num_insn = num_insn + 1;
+ window_list->num_uops = window_list->num_uops + num_uops;
+ window_list->imm_size += imm_size;
+ window_list->num_imm += num_imm_operand;
+ window_list->num_imm_32 += num_imm32_operand;
+ window_list->num_imm_64 += num_imm64_operand;
+
+ if (group == disp_store)
+ window_list->num_stores += 1;
+ else if (group == disp_load
+ || group == disp_prefetch)
+ window_list->num_loads += 1;
+ else if (group == disp_load_store)
+ {
+ window_list->num_stores += 1;
+ window_list->num_loads += 1;
+ }
+}
+
+/* Adds a scheduled instruction, INSN, to the current dispatch window.
+ If the total bytes of instructions or the number of instructions in
+ the window exceed allowable, it allocates a new window. */
+
+static void
+add_to_dispatch_window (rtx insn)
+{
+ int byte_len;
+ dispatch_windows *window_list;
+ dispatch_windows *next_list;
+ dispatch_windows *window0_list;
+ enum insn_path path;
+ enum dispatch_group insn_group;
+ bool insn_fits;
+ int num_insn;
+ int num_uops;
+ int window_num;
+ int insn_num_uops;
+ int sum;
+
+ if (INSN_CODE (insn) < 0)
+ return;
+
+ byte_len = min_insn_size (insn);
+ window_list = dispatch_window_list;
+ next_list = window_list->next;
+ path = get_insn_path (insn);
+ insn_group = get_insn_group (insn);
+
+ /* Get the last dispatch window. */
+ if (next_list)
+ window_list = dispatch_window_list->next;
+
+ if (path == path_single)
+ insn_num_uops = 1;
+ else if (path == path_double)
+ insn_num_uops = 2;
+ else
+ insn_num_uops = (int) path;
+
+ /* If current window is full, get a new window.
+ Window number zero is full, if MAX_INSN uops are scheduled in it.
+ Window number one is full, if window zero's bytes plus window
+ one's bytes is 32, or if the bytes of the new instruction added
+ to the total makes it greater than 48, or it has already MAX_INSN
+ instructions in it. */
+ num_insn = window_list->num_insn;
+ num_uops = window_list->num_uops;
+ window_num = window_list->window_num;
+ insn_fits = fits_dispatch_window (insn);
+
+ if (num_insn >= MAX_INSN
+ || num_uops + insn_num_uops > MAX_INSN
+ || !(insn_fits))
+ {
+ window_num = ~window_num & 1;
+ window_list = allocate_next_window (window_num);
+ }
+
+ if (window_num == 0)
+ {
+ add_insn_window (insn, window_list, insn_num_uops);
+ if (window_list->num_insn >= MAX_INSN
+ && insn_group == disp_branch)
+ {
+ process_end_window ();
+ return;
+ }
+ }
+ else if (window_num == 1)
+ {
+ window0_list = window_list->prev;
+ sum = window0_list->window_size + window_list->window_size;
+ if (sum == 32
+ || (byte_len + sum) >= 48)
+ {
+ process_end_window ();
+ window_list = dispatch_window_list;
+ }
+
+ add_insn_window (insn, window_list, insn_num_uops);
+ }
+ else
+ gcc_unreachable ();
+
+ if (is_end_basic_block (insn_group))
+ {
+ /* End of basic block is reached do end-basic-block process. */
+ process_end_window ();
+ return;
+ }
+}
+
+/* Print the dispatch window, WINDOW_NUM, to FILE. */
+
+DEBUG_FUNCTION static void
+debug_dispatch_window_file (FILE *file, int window_num)
+{
+ dispatch_windows *list;
+ int i;
+
+ if (window_num == 0)
+ list = dispatch_window_list;
+ else
+ list = dispatch_window_list1;
+
+ fprintf (file, "Window #%d:\n", list->window_num);
+ fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
+ list->num_insn, list->num_uops, list->window_size);
+ fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
+ list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
+
+ fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
+ list->num_stores);
+ fprintf (file, " insn info:\n");
+
+ for (i = 0; i < MAX_INSN; i++)
+ {
+ if (!list->window[i].insn)
+ break;
+ fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
+ i, group_name[list->window[i].group],
+ i, (void *)list->window[i].insn,
+ i, list->window[i].path,
+ i, list->window[i].byte_len,
+ i, list->window[i].imm_bytes);
+ }
+}
+
+/* Print to stdout a dispatch window. */
+
+DEBUG_FUNCTION void
+debug_dispatch_window (int window_num)
+{
+ debug_dispatch_window_file (stdout, window_num);
+}
+
+/* Print INSN dispatch information to FILE. */
+
+DEBUG_FUNCTION static void
+debug_insn_dispatch_info_file (FILE *file, rtx insn)
+{
+ int byte_len;
+ enum insn_path path;
+ enum dispatch_group group;
+ int imm_size;
+ int num_imm_operand;
+ int num_imm32_operand;
+ int num_imm64_operand;
+
+ if (INSN_CODE (insn) < 0)
+ return;
+
+ byte_len = min_insn_size (insn);
+ path = get_insn_path (insn);
+ group = get_insn_group (insn);
+ imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+ &num_imm64_operand);
+
+ fprintf (file, " insn info:\n");
+ fprintf (file, " group = %s, path = %d, byte_len = %d\n",
+ group_name[group], path, byte_len);
+ fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
+ num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
+}
+
+/* Print to STDERR the status of the ready list with respect to
+ dispatch windows. */
+
+DEBUG_FUNCTION void
+debug_ready_dispatch (void)
+{
+ int i;
+ int no_ready = number_in_ready ();
+
+ fprintf (stdout, "Number of ready: %d\n", no_ready);
+
+ for (i = 0; i < no_ready; i++)
+ debug_insn_dispatch_info_file (stdout, get_ready_element (i));
+}
+
+/* This routine is the driver of the dispatch scheduler. */
+
+static void
+do_dispatch (rtx insn, int mode)
+{
+ if (mode == DISPATCH_INIT)
+ init_dispatch_sched ();
+ else if (mode == ADD_TO_DISPATCH_WINDOW)
+ add_to_dispatch_window (insn);
+}
+
+/* Return TRUE if Dispatch Scheduling is supported. */
+
+static bool
+has_dispatch (rtx insn, int action)
+{
+ if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
+ switch (action)
+ {
+ default:
+ return false;
+
+ case IS_DISPATCH_ON:
+ return true;
+ break;
+
+ case IS_CMP:
+ return is_cmp (insn);
+
+ case DISPATCH_VIOLATION:
+ return dispatch_violation ();
+
+ case FITS_DISPATCH_WINDOW:
+ return fits_dispatch_window (insn);
+ }
+
+ return false;
+}
+
+/* ??? No autovectorization into MMX or 3DNOW until we can reliably
+ place emms and femms instructions. */
+
+unsigned int
+ix86_units_per_simd_word (enum machine_mode mode)
+{
+ /* Disable double precision vectorizer if needed. */
+ if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
+ return UNITS_PER_WORD;
+
+#if 0
+ /* FIXME: AVX has 32byte floating point vector operations and 16byte
+ integer vector operations. But vectorizer doesn't support
+ different sizes for integer and floating point vectors. We limit
+ vector size to 16byte. */
+ if (TARGET_AVX)
+ return (mode == DFmode || mode == SFmode) ? 32 : 16;
+ else
+#endif
+ return TARGET_SSE ? 16 : UNITS_PER_WORD;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
#endif
+#undef TARGET_PROFILE_BEFORE_PROLOGUE
+#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
+
#undef TARGET_ASM_UNALIGNED_HI_OP
#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
#undef TARGET_ASM_UNALIGNED_SI_OP
#undef TARGET_ASM_UNALIGNED_DI_OP
#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND ix86_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
+
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
#undef TARGET_SCHED_ISSUE_RATE
#undef TARGET_HANDLE_OPTION
#define TARGET_HANDLE_OPTION ix86_handle_option
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS ix86_rtx_costs
#undef TARGET_ADDRESS_COST
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
+#undef TARGET_ENUM_VA_LIST_P
+#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
+
#undef TARGET_FN_ABI_VA_LIST
#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
#undef TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG ix86_function_arg
#undef TARGET_PASS_BY_REFERENCE
#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
#undef TARGET_INTERNAL_ARG_POINTER
#define TARGET_STATIC_CHAIN ix86_static_chain
#undef TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
+#undef TARGET_RETURN_POPS_ARGS
+#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
+
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
ix86_builtin_vectorization_cost