static rtx arm_pic_static_addr (rtx orig, rtx reg);
static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
-static unsigned int arm_units_per_simd_word (enum machine_mode);
+static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
static bool arm_class_likely_spilled_p (reg_class_t);
+static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
+static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
+ const_tree type,
+ int misalignment,
+ bool is_packed);
\f
/* Table of machine attributes. */
#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
-#undef TARGET_VECTORIZE_UNITS_PER_SIMD_WORD
-#define TARGET_VECTORIZE_UNITS_PER_SIMD_WORD arm_units_per_simd_word
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
#undef TARGET_CLASS_LIKELY_SPILLED_P
#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
+ arm_vector_alignment_reachable
+
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
+ arm_builtin_support_vector_misalignment
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
/* Obstack for minipool constant handling. */
flag_reorder_blocks = 1;
}
- if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST)
- && flag_pic)
+ if (flag_pic)
/* Hoisting PIC address calculations more aggressively provides a small,
but measurable, size reduction for PIC code. Therefore, we decrease
the bar for unrestricted expression hoisting to the cost of PIC address
calculation, which is 2 instructions. */
- set_param_value ("gcse-unrestricted-cost", 2);
+ maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2);
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
return arm_address_register_rtx_p (ind, 0);
/* Allow post-increment with Neon registers. */
- if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
+ if ((type != 1 && GET_CODE (ind) == POST_INC)
+ || (type == 0 && GET_CODE (ind) == PRE_DEC))
return arm_address_register_rtx_p (XEXP (ind, 0), 0);
/* FIXME: vld1 allows register post-modify. */
{
rtx addr;
bool postinc = FALSE;
+ unsigned align, modesize, align_bits;
+
gcc_assert (GET_CODE (x) == MEM);
addr = XEXP (x, 0);
if (GET_CODE (addr) == POST_INC)
postinc = 1;
addr = XEXP (addr, 0);
}
- asm_fprintf (stream, "[%r]", REGNO (addr));
+ asm_fprintf (stream, "[%r", REGNO (addr));
+
+ /* We know the alignment of this access, so we can emit a hint in the
+ instruction (for some alignments) as an aid to the memory subsystem
+ of the target. */
+ align = MEM_ALIGN (x) >> 3;
+ modesize = GET_MODE_SIZE (GET_MODE (x));
+
+ /* Only certain alignment specifiers are supported by the hardware. */
+ if (modesize == 16 && (align % 32) == 0)
+ align_bits = 256;
+ else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
+ align_bits = 128;
+ else if ((align % 8) == 0)
+ align_bits = 64;
+ else
+ align_bits = 0;
+
+ if (align_bits != 0)
+ asm_fprintf (stream, ":%d", align_bits);
+
+ asm_fprintf (stream, "]");
+
if (postinc)
fputs("!", stream);
}
registers when autovectorizing for Neon, at least until multiple vector
widths are supported properly by the middle-end. */
-static unsigned int
-arm_units_per_simd_word (enum machine_mode mode ATTRIBUTE_UNUSED)
+static enum machine_mode
+arm_preferred_simd_mode (enum machine_mode mode)
{
- return (TARGET_NEON
- ? (TARGET_NEON_VECTORIZE_QUAD ? 16 : 8) : UNITS_PER_WORD);
+ if (TARGET_NEON)
+ switch (mode)
+ {
+ case SFmode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
+ case SImode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
+ case HImode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
+ case QImode:
+ return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
+ case DImode:
+ if (TARGET_NEON_VECTORIZE_QUAD)
+ return V2DImode;
+ break;
+
+ default:;
+ }
+
+ if (TARGET_REALLY_IWMMXT)
+ switch (mode)
+ {
+ case SImode:
+ return V2SImode;
+ case HImode:
+ return V4HImode;
+ case QImode:
+ return V8QImode;
+
+ default:;
+ }
+
+ return word_mode;
}
/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
}
}
+static bool
+arm_vector_alignment_reachable (const_tree type, bool is_packed)
+{
+ /* Vectors which aren't in packed structures will not be less aligned than
+ the natural alignment of their element type, so this is safe. */
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN)
+ return !is_packed;
+
+ return default_builtin_vector_alignment_reachable (type, is_packed);
+}
+
+static bool
+arm_builtin_support_vector_misalignment (enum machine_mode mode,
+ const_tree type, int misalignment,
+ bool is_packed)
+{
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN)
+ {
+ HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
+
+ if (is_packed)
+ return align == 1;
+
+ /* If the misalignment is unknown, we should be able to handle the access
+ so long as it is not to a member of a packed data structure. */
+ if (misalignment == -1)
+ return true;
+
+ /* Return true if the misalignment is a multiple of the natural alignment
+ of the vector's element type. This is probably always going to be
+ true in practice, since we've already established that this isn't a
+ packed access. */
+ return ((misalignment % align) == 0);
+ }
+
+ return default_builtin_support_vector_misalignment (mode, type, misalignment,
+ is_packed);
+}
+
#include "gt-arm.h"