gcc/

author rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>

Wed, 24 Oct 2007 17:46:39 +0000 (17:46 +0000)

committer rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>

Wed, 24 Oct 2007 17:46:39 +0000 (17:46 +0000)
author rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 24 Oct 2007 17:46:39 +0000 (17:46 +0000)
committer rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 24 Oct 2007 17:46:39 +0000 (17:46 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 397bbb7..6522ce3 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2007-10-24  Richard Sandiford  <rsandifo@nildram.co.uk>
+
+       * config/mips/mips.h (MOVE_MAX): Use UNITS_PER_WORD and describe
+       MIPS-specific implementation details.
+       (MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER): New macro.
+       (MIPS_MAX_MOVE_BYTES_STRAIGHT): Likewise.
+       (MOVE_RATIO): Define to MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD
+       for targets with movmemsi.
+       (MOVE_BY_PIECES_P): Define.
+       * config/mips/mips.c (MAX_MOVE_REGS, MAX_MOVE_BYTES): Delete.
+       (mips_block_move_loop): Add a bytes_per_iter argument.
+       (mips_expand_block_move): Use MIPS_MAX_MOVE_BYTES_STRAIGHT.
+       Update call to mips_block_move_loop.
+
  2007-10-24  Michael Matz  <matz@suse.de>
  
         PR debug/33868
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c

index 7167860..19fbbd4 100644 (file)
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -5622,9 +5622,6 @@ mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch)
    emit_insn (gen_slt_sf (dest, fp2, fp1));
  }
  \f
-#define MAX_MOVE_REGS 4
-#define MAX_MOVE_BYTES (MAX_MOVE_REGS * UNITS_PER_WORD)
-
  /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
     Assume that the areas do not overlap.  */
  
@@ -5710,22 +5707,23 @@ mips_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
    set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
  }
  
-/* Move LENGTH bytes from SRC to DEST using a loop that moves MAX_MOVE_BYTES
-   per iteration.  LENGTH must be at least MAX_MOVE_BYTES.  Assume that the
-   memory regions do not overlap.  */
+/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
+   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
+   the memory regions do not overlap.  */
  
  static void
-mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length)
+mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+                     HOST_WIDE_INT bytes_per_iter)
  {
    rtx label, src_reg, dest_reg, final_src;
    HOST_WIDE_INT leftover;
  
-  leftover = length % MAX_MOVE_BYTES;
+  leftover = length % bytes_per_iter;
    length -= leftover;
  
    /* Create registers and memory references for use within the loop.  */
-  mips_adjust_block_mem (src, MAX_MOVE_BYTES, &src_reg, &src);
-  mips_adjust_block_mem (dest, MAX_MOVE_BYTES, &dest_reg, &dest);
+  mips_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
+  mips_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
  
    /* Calculate the value that SRC_REG should have after the last iteration
       of the loop.  */
@@ -5737,11 +5735,11 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length)
    emit_label (label);
  
    /* Emit the loop body.  */
-  mips_block_move_straight (dest, src, MAX_MOVE_BYTES);
+  mips_block_move_straight (dest, src, bytes_per_iter);
  
    /* Move on to the next block.  */
-  mips_emit_move (src_reg, plus_constant (src_reg, MAX_MOVE_BYTES));
-  mips_emit_move (dest_reg, plus_constant (dest_reg, MAX_MOVE_BYTES));
+  mips_emit_move (src_reg, plus_constant (src_reg, bytes_per_iter));
+  mips_emit_move (dest_reg, plus_constant (dest_reg, bytes_per_iter));
  
    /* Emit the loop condition.  */
    if (Pmode == DImode)
@@ -5763,14 +5761,15 @@ mips_expand_block_move (rtx dest, rtx src, rtx length)
  {
    if (GET_CODE (length) == CONST_INT)
      {
-      if (INTVAL (length) <= 2 * MAX_MOVE_BYTES)
+      if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)
         {
           mips_block_move_straight (dest, src, INTVAL (length));
           return true;
         }
        else if (optimize)
         {
-         mips_block_move_loop (dest, src, INTVAL (length));
+         mips_block_move_loop (dest, src, INTVAL (length),
+                               MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER);
           return true;
         }
      }
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h

index e1794c2..b4778a8 100644 (file)
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2338,9 +2338,10 @@ typedef struct mips_args {
  #define DEFAULT_SIGNED_CHAR 1
  #endif
  
-/* Max number of bytes we can move from memory to memory
-   in one reasonably fast instruction.  */
-#define MOVE_MAX (TARGET_64BIT ? 8 : 4)
+/* Although LDC1 and SDC1 provide 64-bit moves on 32-bit targets,
+   we generally don't want to use them for copying arbitrary data.
+   A single N-word move is usually the same cost as N single-word moves.  */
+#define MOVE_MAX UNITS_PER_WORD
  #define MAX_MOVE_MAX 8
  
  /* Define this macro as a C expression which is nonzero if
@@ -2769,6 +2770,18 @@ while (0)
  #undef PTRDIFF_TYPE
  #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
  
+/* The maximum number of bytes that can be copied by one iteration of
+   a movmemsi loop; see mips_block_move_loop.  */
+#define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \
+  (UNITS_PER_WORD * 4)
+
+/* The maximum number of bytes that can be copied by a straight-line
+   implementation of movmemsi; see mips_block_move_straight.  We want
+   to make sure that any loop-based implementation will iterate at
+   least twice.  */
+#define MIPS_MAX_MOVE_BYTES_STRAIGHT \
+  (MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
+
  /* The base cost of a memcpy call, for MOVE_RATIO and friends.  These
     values were determined experimentally by benchmarking with CSiBE.
     In theory, the call overhead is higher for TARGET_ABICALLS (especially
@@ -2778,23 +2791,39 @@ while (0)
  
  #define MIPS_CALL_RATIO 8
  
-/* Define MOVE_RATIO to encourage use of movmemsi when enabled,
-   since it should always generate code at least as good as
-   move_by_pieces().  But when inline movmemsi pattern is disabled
-   (i.e., with -mips16 or -mmemcpy), instead use a value approximating
-   the length of a memcpy call sequence, so that move_by_pieces will
-   generate inline code if it is shorter than a function call.
-   Since move_by_pieces_ninsns() counts memory-to-memory moves, but
-   we'll have to generate a load/store pair for each, halve the value of 
-   MIPS_CALL_RATIO to take that into account.
-   The default value for MOVE_RATIO when HAVE_movmemsi is true is 2.
-   There is no point to setting it to less than this to try to disable
-   move_by_pieces entirely, because that also disables some desirable 
-   tree-level optimizations, specifically related to optimizing a
-   one-byte string copy into a simple move byte operation.  */
-
-#define MOVE_RATIO \
-  ((TARGET_MIPS16 || TARGET_MEMCPY) ? MIPS_CALL_RATIO / 2 : 2)
+/* Any loop-based implementation of movmemsi will have at least
+   MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory
+   moves, so allow individual copies of fewer elements.
+
+   When movmemsi is not available, use a value approximating
+   the length of a memcpy call sequence, so that move_by_pieces
+   will generate inline code if it is shorter than a function call.
+   Since move_by_pieces_ninsns counts memory-to-memory moves, but
+   we'll have to generate a load/store pair for each, halve the
+   value of MIPS_CALL_RATIO to take that into account.  */
+
+#define MOVE_RATIO                                     \
+  (HAVE_movmemsi                                       \
+   ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX           \
+   : MIPS_CALL_RATIO / 2)
+
+/* movmemsi is meant to generate code that is at least as good as
+   move_by_pieces.  However, movmemsi effectively uses a by-pieces
+   implementation both for moves smaller than a word and for word-aligned
+   moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT bytes.  We should
+   allow the tree-level optimisers to do such moves by pieces, as it
+   often exposes other optimization opportunities.  We might as well
+   continue to use movmemsi at the rtl level though, as it produces
+   better code when scheduling is disabled (such as at -O).  */
+
+#define MOVE_BY_PIECES_P(SIZE, ALIGN)                          \
+  (HAVE_movmemsi                                               \
+   ? (!currently_expanding_to_rtl                              \
+      && ((ALIGN) < BITS_PER_WORD                              \
+         ? (SIZE) < UNITS_PER_WORD                             \
+         : (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT))            \
+   : (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
+      < (unsigned int) MOVE_RATIO))
  
  /* For CLEAR_RATIO, when optimizing for size, give a better estimate
     of the length of a memset call, but use the default otherwise.  */
author	rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>
	Wed, 24 Oct 2007 17:46:39 +0000 (17:46 +0000)
committer	rsandifo <rsandifo@138bc75d-0d04-0410-961f-82ee72b054a4>
	Wed, 24 Oct 2007 17:46:39 +0000 (17:46 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/mips/mips.c		patch \| blob \| history
gcc/config/mips/mips.h		patch \| blob \| history