#include "expr.h"
#include "recog.h"
#include "dse.h"
+#include "optabs.h"
#include "dbgcnt.h"
/* This file contains three techniques for performing Dead Store
contains a wild read, the use_rec will be null. */
bool wild_read;
- /* This field is set for const function calls. Const functions
- cannot read memory, but they can read the stack because that is
- where they may get their parms. So having this set is less
- severe than a wild read, it just means that all of the stores to
- the stack are killed rather than all stores. */
- bool stack_read;
+ /* This field is only used for the processing of const functions.
+ These functions cannot read memory, but they can read the stack
+ because that is where they may get their parms. We need to be
+ this conservative because, like the store motion pass, we don't
+ consider CALL_INSN_FUNCTION_USAGE when processing call insns.
+ Moreover, we need to distinguish two cases:
+ 1. Before reload (register elimination), the stores related to
+ outgoing arguments are stack pointer based and thus deemed
+ of non-constant base in this pass. This requires special
+ handling but also means that the frame pointer based stores
+ need not be killed upon encountering a const function call.
+ 2. After reload, the stores related to outgoing arguments can be
+ either stack pointer or hard frame pointer based. This means
+ that we have no other choice than also killing all the frame
+ pointer based stores upon encountering a const function call.
+ This field is set after reload for const function calls. Having
+ this set is less severe than a wild read, it just means that all
+ the frame related stores are killed rather than all the stores. */
+ bool frame_read;
+
+ /* This field is only used for the processing of const functions.
+ It is set if the insn may contain a stack pointer based store. */
+ bool stack_pointer_based;
/* This is true if any of the sets within the store contains a
cselib base. Such stores can only be deleted by the local
}
-/* Return true if X is a constant or one of the registers that behaves
- as a constant over the life of a function. */
+/* Return true if X is a constant or one of the registers that behave
+ as a constant over the life of a function. This is equivalent to
+ !rtx_varies_p for memory addresses. */
static bool
const_or_frame_p (rtx x)
}
else
{
- store_info = pool_alloc (cse_store_info_pool);
+ rtx base_term = find_base_term (XEXP (mem, 0));
+ if (!base_term
+ || (GET_CODE (base_term) == ADDRESS
+ && GET_MODE (base_term) == Pmode
+ && XEXP (base_term, 0) == stack_pointer_rtx))
+ insn_info->stack_pointer_based = true;
insn_info->contains_cselib_groups = true;
+
+ store_info = pool_alloc (cse_store_info_pool);
group_id = -1;
if (dump_file)
}
+/* If the modes are different and the value's source and target do not
+ line up, we need to extract the value from lower part of the rhs of
+ the store, shift it, and then put it into a form that can be shoved
+ into the read_insn. This function generates a right SHIFT of a
+ value that is at least ACCESS_SIZE bytes wide of READ_MODE. The
+ shift sequence is returned or NULL if we failed to find a
+ shift. */
+
+static rtx
+find_shift_sequence (rtx read_reg,
+ int access_size,
+ store_info_t store_info,
+ read_info_t read_info,
+ int shift)
+{
+ enum machine_mode store_mode = GET_MODE (store_info->mem);
+ enum machine_mode read_mode = GET_MODE (read_info->mem);
+ rtx chosen_seq = NULL;
+
+ /* Some machines like the x86 have shift insns for each size of
+ operand. Other machines like the ppc or the ia-64 may only have
+ shift insns that shift values within 32 or 64 bit registers.
+ This loop tries to find the smallest shift insn that will right
+ justify the value we want to read but is available in one insn on
+ the machine. */
+
+ for (; access_size <= UNITS_PER_WORD; access_size *= 2)
+ {
+ rtx target, new_reg, shift_seq, insn;
+ enum machine_mode new_mode;
+ int cost;
+
+ /* Try a wider mode if truncating the store mode to ACCESS_SIZE
+ bytes requires a real instruction. */
+ if (access_size < GET_MODE_SIZE (store_mode)
+ && !TRULY_NOOP_TRUNCATION (access_size * BITS_PER_UNIT,
+ GET_MODE_BITSIZE (store_mode)))
+ continue;
+
+ new_mode = smallest_mode_for_size (access_size * BITS_PER_UNIT,
+ MODE_INT);
+ new_reg = gen_reg_rtx (new_mode);
+
+ start_sequence ();
+
+ /* In theory we could also check for an ashr. Ian Taylor knows
+ of one dsp where the cost of these two was not the same. But
+ this really is a rare case anyway. */
+ target = expand_binop (new_mode, lshr_optab, new_reg,
+ GEN_INT (shift), new_reg, 1, OPTAB_DIRECT);
+
+ shift_seq = get_insns ();
+ end_sequence ();
+
+ if (target != new_reg || shift_seq == NULL)
+ continue;
+
+ cost = 0;
+ for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
+ if (INSN_P (insn))
+ cost += insn_rtx_cost (PATTERN (insn));
+
+ /* The computation up to here is essentially independent
+ of the arguments and could be precomputed. It may
+ not be worth doing so. We could precompute if
+ worthwhile or at least cache the results. The result
+ technically depends on both SHIFT and ACCESS_SIZE,
+ but in practice the answer will depend only on ACCESS_SIZE. */
+
+ if (cost > COSTS_N_INSNS (1))
+ continue;
+
+ /* We found an acceptable shift. Generate a move to
+ take the value from the store and put it into the
+ shift pseudo, then shift it, then generate another
+ move to put in into the target of the read. */
+ start_sequence ();
+ emit_move_insn (new_reg, gen_lowpart (new_mode, store_info->rhs));
+ emit_insn (shift_seq);
+ convert_move (read_reg, new_reg, 1);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
+ REGNO (new_reg), GET_MODE_NAME (new_mode),
+ REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
+
+ fprintf (dump_file, " -- with shift of r%d by %d\n",
+ REGNO(new_reg), shift);
+ fprintf (dump_file, " -- and second extract insn r%d:%s = r%d:%s\n",
+ REGNO (read_reg), GET_MODE_NAME (read_mode),
+ REGNO (new_reg), GET_MODE_NAME (new_mode));
+ }
+
+ /* Get the three insn sequence and return it. */
+ chosen_seq = get_insns ();
+ end_sequence ();
+ break;
+ }
+
+ return chosen_seq;
+}
+
+
/* Take a sequence of:
A <- r1
...
...
... <- r2
- The STORE_INFO and STORE_INFO are for the store and the READ_INFO
+ or
+
+ r3 <- extract (r1)
+ r3 <- r3 >> shift
+ r2 <- extract (r3)
+ ... <- r2
+
+ or
+
+ r2 <- extract (r1)
+ ... <- r2
+
+ Depending on the alignment and the mode of the store and
+ subsequent load.
+
+
+ The STORE_INFO and STORE_INSN are for the store and READ_INFO
and READ_INSN are for the read. Return true if the replacement
went ok. */
replace_read (store_info_t store_info, insn_info_t store_insn,
read_info_t read_info, insn_info_t read_insn, rtx *loc)
{
+ enum machine_mode store_mode = GET_MODE (store_info->mem);
+ enum machine_mode read_mode = GET_MODE (read_info->mem);
+ int shift;
+ int access_size; /* In bytes. */
+ rtx read_reg = gen_reg_rtx (read_mode);
+ rtx shift_seq = NULL;
+
if (!dbg_cnt (dse))
return false;
+ if (GET_MODE_CLASS (read_mode) != MODE_INT
+ || GET_MODE_CLASS (store_mode) != MODE_INT)
+ return false;
+
+ /* To get here the read is within the boundaries of the write so
+ shift will never be negative. Start out with the shift being in
+ bytes. */
+ if (BYTES_BIG_ENDIAN)
+ shift = store_info->end - read_info->end;
+ else
+ shift = read_info->begin - store_info->begin;
+
+ access_size = shift + GET_MODE_SIZE (read_mode);
+
+ /* From now on it is bits. */
+ shift *= BITS_PER_UNIT;
+
+ /* We need to keep this in perspective. We are replacing a read
+ with a sequence of insns, but the read will almost certainly be
+ in cache, so it is not going to be an expensive one. Thus, we
+ are not willing to do a multi insn shift or worse a subroutine
+ call to get rid of the read. */
+ if (shift)
+ {
+ if (access_size > UNITS_PER_WORD)
+ return false;
+
+ shift_seq = find_shift_sequence (read_reg, access_size, store_info,
+ read_info, shift);
+ if (!shift_seq)
+ return false;
+ }
+
if (dump_file)
- fprintf (dump_file, "generating move to replace load at %d from store at %d\n",
+ fprintf (dump_file, "replacing load at %d from store at %d\n",
INSN_UID (read_insn->insn), INSN_UID (store_insn->insn));
- if (GET_MODE (store_info->mem) == GET_MODE (read_info->mem))
+
+ if (validate_change (read_insn->insn, loc, read_reg, 0))
{
- rtx new_reg = gen_reg_rtx (GET_MODE (store_info->mem));
- if (validate_change (read_insn->insn, loc, new_reg, 0))
+ rtx insns;
+ deferred_change_t deferred_change = pool_alloc (deferred_change_pool);
+
+ if (read_mode == store_mode)
{
- rtx insns;
- deferred_change_t deferred_change = pool_alloc (deferred_change_pool);
-
start_sequence ();
- emit_move_insn (new_reg, store_info->rhs);
+
+ /* The modes are the same and everything lines up. Just
+ generate a simple move. */
+ emit_move_insn (read_reg, store_info->rhs);
+ if (dump_file)
+ fprintf (dump_file, " -- adding move insn r%d = r%d\n",
+ REGNO (read_reg), REGNO (store_info->rhs));
insns = get_insns ();
end_sequence ();
- emit_insn_before (insns, store_insn->insn);
-
- if (dump_file)
- fprintf (dump_file, " -- adding move insn %d: r%d = r%d\n",
- INSN_UID (insns), REGNO (new_reg), REGNO (store_info->rhs));
-
- /* And now for the cludge part: cselib croaks if you just
- return at this point. There are two reasons for this:
-
- 1) Cselib has an idea of how many pseudos there are and
- that does not include the new one we just added.
-
- 2) Cselib does not know about the move insn we added
- above the store_info, and there is no way to tell it
- about it, because it has "moved on".
-
- So we are just going to have to lie. The move insn is
- not really an issue, cselib did not see it. But the use
- of the new pseudo read_insn is a real problem. The way
- that we solve this problem is that we are just going to
- put the mem back keep a table of mems to get rid of. At
- the end of the basic block we can put it back. */
-
- *loc = read_info->mem;
- deferred_change->next = deferred_change_list;
- deferred_change_list = deferred_change;
- deferred_change->loc = loc;
- deferred_change->reg = new_reg;
-
- /* Get rid of the read_info, from the point of view of the
- rest of dse, play like this read never happened. */
- read_insn->read_rec = read_info->next;
- pool_free (read_info_pool, read_info);
- return true;
}
- else
+ else if (shift)
+ insns = shift_seq;
+ else
{
+ /* The modes are different but the lsb are in the same
+ place, we need to extract the value in the right from the
+ rhs of the store. */
+ start_sequence ();
+ convert_move (read_reg, store_info->rhs, 1);
+
if (dump_file)
- fprintf (dump_file, " -- validation failure\n");
- return false;
+ fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
+ REGNO (read_reg), GET_MODE_NAME (read_mode),
+ REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
+ insns = get_insns ();
+ end_sequence ();
}
+
+ /* Insert this right before the store insn where it will be safe
+ from later insns that might change it before the read. */
+ emit_insn_before (insns, store_insn->insn);
+
+ /* And now for the kludge part: cselib croaks if you just
+ return at this point. There are two reasons for this:
+
+ 1) Cselib has an idea of how many pseudos there are and
+ that does not include the new ones we just added.
+
+ 2) Cselib does not know about the move insn we added
+ above the store_info, and there is no way to tell it
+ about it, because it has "moved on".
+
+ Problem (1) is fixable with a certain amount of engineering.
+ Problem (2) is requires starting the bb from scratch. This
+ could be expensive.
+
+ So we are just going to have to lie. The move/extraction
+ insns are not really an issue, cselib did not see them. But
+ the use of the new pseudo read_insn is a real problem because
+ cselib has not scanned this insn. The way that we solve this
+ problem is that we are just going to put the mem back for now
+ and when we are finished with the block, we undo this. We
+ keep a table of mems to get rid of. At the end of the basic
+ block we can put them back. */
+
+ *loc = read_info->mem;
+ deferred_change->next = deferred_change_list;
+ deferred_change_list = deferred_change;
+ deferred_change->loc = loc;
+ deferred_change->reg = read_reg;
+
+ /* Get rid of the read_info, from the point of view of the
+ rest of dse, play like this read never happened. */
+ read_insn->read_rec = read_info->next;
+ pool_free (read_info_pool, read_info);
+ return true;
}
- else
+ else
{
- /* Someone with excellent rtl skills needs to fill this in. You
- are guaranteed that the read is of the same size or smaller
- than the store, and that the read does not hang off one of
- the ends of the store. But the offsets of each must be
- checked because the read does not have to line up on either
- end of the store so the begin fields need to be examined in
- both the store_info and read_info. */
if (dump_file)
- fprintf (dump_file, " -- complex load, currently unsupported.\n");
+ fprintf (dump_file, " -- validation failure\n");
return false;
}
}
-
/* A for_each_rtx callback in which DATA is the bb_info. Check to see
if LOC is a mem and if it is look at the address and kill any
appropriate stores that may be active. */
if (CALL_P (insn))
{
insn_info->cannot_delete = true;
+
/* Const functions cannot do anything bad i.e. read memory,
- however, they can read their parameters which may have been
- pushed onto the stack. */
+ however, they can read their parameters which may have
+ been pushed onto the stack. */
if (CONST_OR_PURE_CALL_P (insn) && !pure_call_p (insn))
{
insn_info_t i_ptr = active_local_stores;
if (dump_file)
fprintf (dump_file, "const call %d\n", INSN_UID (insn));
+ /* See the head comment of the frame_read field. */
+ if (reload_completed)
+ insn_info->frame_read = true;
+
+ /* Loop over the active stores and remove those which are
+ killed by the const function call. */
while (i_ptr)
{
- store_info_t store_info = i_ptr->store_rec;
+ bool remove_store = false;
- /* Skip the clobbers. */
- while (!store_info->is_set)
- store_info = store_info->next;
+ /* The stack pointer based stores are always killed. */
+ if (i_ptr->stack_pointer_based)
+ remove_store = true;
+
+ /* If the frame is read, the frame related stores are killed. */
+ else if (insn_info->frame_read)
+ {
+ store_info_t store_info = i_ptr->store_rec;
+
+ /* Skip the clobbers. */
+ while (!store_info->is_set)
+ store_info = store_info->next;
- /* Remove the frame related stores. */
- if (store_info->group_id >= 0
- && VEC_index (group_info_t, rtx_group_vec, store_info->group_id)->frame_related)
+ if (store_info->group_id >= 0
+ && VEC_index (group_info_t, rtx_group_vec,
+ store_info->group_id)->frame_related)
+ remove_store = true;
+ }
+
+ if (remove_store)
{
if (dump_file)
dump_insn_info ("removing from active", i_ptr);
}
else
last = i_ptr;
+
i_ptr = i_ptr->next_local_store;
}
-
- insn_info->stack_read = true;
-
- return;
}
- /* Every other call, including pure functions may read memory. */
- add_wild_read (bb_info);
+ else
+ /* Every other call, including pure functions, may read memory. */
+ add_wild_read (bb_info);
+
return;
}
/* Assuming that there are sets in these insns, we cannot delete
them. */
if ((GET_CODE (PATTERN (insn)) == CLOBBER)
- || volatile_insn_p (PATTERN (insn))
+ || volatile_refs_p (PATTERN (insn))
|| (flag_non_call_exceptions && may_trap_p (PATTERN (insn)))
|| (RTX_FRAME_RELATED_P (insn))
|| find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
int i;
group_info_t group;
- /* For const function calls kill the stack related stores. */
- if (insn_info->stack_read)
+ /* If this insn reads the frame, kill all the frame related stores. */
+ if (insn_info->frame_read)
{
for (i = 0; VEC_iterate (group_info_t, rtx_group_vec, i, group); i++)
if (group->process_globally && group->frame_related)
TODO_ggc_collect, /* todo_flags_finish */
'w' /* letter */
};
-