#define PREFETCH_ALL (~(unsigned HOST_WIDE_INT) 0)
+/* Do not generate a prefetch if the unroll factor is significantly less
+ than what is required by the prefetch. This is to avoid redundant
+ prefetches. For example, if prefetch_mod is 16 and unroll_factor is
+ 1, this means prefetching requires unrolling the loop 16 times, but
+ the loop is not going to be unrolled. In this case (ratio = 16),
+ prefetching is not likely to be beneficial. */
+
+#ifndef PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO
+#define PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO 8
+#endif
+
/* The memory reference. */
struct mem_ref
hit_from = ddown (delta_b, PREFETCH_BLOCK) * PREFETCH_BLOCK;
prefetch_before = (hit_from - delta_r + step - 1) / step;
+ /* Do not reduce prefetch_before if we meet beyond cache size. */
+ if (prefetch_before > (unsigned) abs (L2_CACHE_SIZE_BYTES / step))
+ prefetch_before = PREFETCH_ALL;
if (prefetch_before < ref->prefetch_before)
ref->prefetch_before = prefetch_before;
reduced_prefetch_block, align_unit);
if (miss_rate <= ACCEPTABLE_MISS_RATE)
{
+ /* Do not reduce prefetch_before if we meet beyond cache size. */
+ if (prefetch_before > L2_CACHE_SIZE_BYTES / PREFETCH_BLOCK)
+ prefetch_before = PREFETCH_ALL;
if (prefetch_before < ref->prefetch_before)
ref->prefetch_before = prefetch_before;
if (!should_issue_prefetch_p (ref))
continue;
+ /* The loop is far from being sufficiently unrolled for this
+ prefetch. Do not generate prefetch to avoid many redudant
+ prefetches. */
+ if (ref->prefetch_mod / unroll_factor > PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO)
+ continue;
+
/* If we need to prefetch the reference each PREFETCH_MOD iterations,
and we unroll the loop UNROLL_FACTOR times, we need to insert
ceil (UNROLL_FACTOR / PREFETCH_MOD) instructions in each