+/* Returns the total volume of the memory references REFS, taking into account
+ reuses in the innermost loop and cache line size. TODO -- we should also
+ take into account reuses across the iterations of the loops in the loop
+ nest. */
+
+static unsigned
+volume_of_references (struct mem_ref_group *refs)
+{
+ unsigned volume = 0;
+ struct mem_ref_group *gr;
+ struct mem_ref *ref;
+
+ for (gr = refs; gr; gr = gr->next)
+ for (ref = gr->refs; ref; ref = ref->next)
+ {
+ /* Almost always reuses another value? */
+ if (ref->prefetch_before != PREFETCH_ALL)
+ continue;
+
+ /* If several iterations access the same cache line, use the size of
+ the line divided by this number. Otherwise, a cache line is
+ accessed in each iteration. TODO -- in the latter case, we should
+ take the size of the reference into account, rounding it up on cache
+ line size multiple. */
+ volume += L1_CACHE_LINE_SIZE / ref->prefetch_mod;
+ }
+ return volume;
+}
+
+/* Returns the volume of memory references accessed across VEC iterations of
+ loops, whose sizes are described in the LOOP_SIZES array. N is the number
+ of the loops in the nest (length of VEC and LOOP_SIZES vectors). */
+
+static unsigned
+volume_of_dist_vector (lambda_vector vec, unsigned *loop_sizes, unsigned n)
+{
+ unsigned i;
+
+ for (i = 0; i < n; i++)
+ if (vec[i] != 0)
+ break;
+
+ if (i == n)
+ return 0;
+
+ gcc_assert (vec[i] > 0);
+
+ /* We ignore the parts of the distance vector in subloops, since usually
+ the numbers of iterations are much smaller. */
+ return loop_sizes[i] * vec[i];
+}
+
+/* Add the steps of ACCESS_FN multiplied by STRIDE to the array STRIDE
+ at the position corresponding to the loop of the step. N is the depth
+ of the considered loop nest, and, LOOP is its innermost loop. */
+
+static void
+add_subscript_strides (tree access_fn, unsigned stride,
+ HOST_WIDE_INT *strides, unsigned n, struct loop *loop)
+{
+ struct loop *aloop;
+ tree step;
+ HOST_WIDE_INT astep;
+ unsigned min_depth = loop_depth (loop) - n;
+
+ while (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
+ {
+ aloop = get_chrec_loop (access_fn);
+ step = CHREC_RIGHT (access_fn);
+ access_fn = CHREC_LEFT (access_fn);
+
+ if ((unsigned) loop_depth (aloop) <= min_depth)
+ continue;
+
+ if (host_integerp (step, 0))
+ astep = tree_low_cst (step, 0);
+ else
+ astep = L1_CACHE_LINE_SIZE;
+
+ strides[n - 1 - loop_depth (loop) + loop_depth (aloop)] += astep * stride;
+
+ }
+}
+
+/* Returns the volume of memory references accessed between two consecutive
+ self-reuses of the reference DR. We consider the subscripts of DR in N
+ loops, and LOOP_SIZES contains the volumes of accesses in each of the
+ loops. LOOP is the innermost loop of the current loop nest. */
+
+static unsigned
+self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n,
+ struct loop *loop)
+{
+ tree stride, access_fn;
+ HOST_WIDE_INT *strides, astride;
+ VEC (tree, heap) *access_fns;
+ tree ref = DR_REF (dr);
+ unsigned i, ret = ~0u;
+
+ /* In the following example:
+
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ use (a[j][i]);
+ the same cache line is accessed each N steps (except if the change from
+ i to i + 1 crosses the boundary of the cache line). Thus, for self-reuse,
+ we cannot rely purely on the results of the data dependence analysis.
+
+ Instead, we compute the stride of the reference in each loop, and consider
+ the innermost loop in that the stride is less than cache size. */
+
+ strides = XCNEWVEC (HOST_WIDE_INT, n);
+ access_fns = DR_ACCESS_FNS (dr);
+
+ for (i = 0; VEC_iterate (tree, access_fns, i, access_fn); i++)
+ {
+ /* Keep track of the reference corresponding to the subscript, so that we
+ know its stride. */
+ while (handled_component_p (ref) && TREE_CODE (ref) != ARRAY_REF)
+ ref = TREE_OPERAND (ref, 0);
+
+ if (TREE_CODE (ref) == ARRAY_REF)
+ {
+ stride = TYPE_SIZE_UNIT (TREE_TYPE (ref));
+ if (host_integerp (stride, 1))
+ astride = tree_low_cst (stride, 1);
+ else
+ astride = L1_CACHE_LINE_SIZE;
+
+ ref = TREE_OPERAND (ref, 0);
+ }
+ else
+ astride = 1;
+
+ add_subscript_strides (access_fn, astride, strides, n, loop);
+ }
+
+ for (i = n; i-- > 0; )
+ {
+ unsigned HOST_WIDE_INT s;
+
+ s = strides[i] < 0 ? -strides[i] : strides[i];
+
+ if (s < (unsigned) L1_CACHE_LINE_SIZE
+ && (loop_sizes[i]
+ > (unsigned) (L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION)))
+ {
+ ret = loop_sizes[i];
+ break;
+ }
+ }
+
+ free (strides);
+ return ret;
+}
+
+/* Determines the distance till the first reuse of each reference in REFS
+ in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other
+ memory references in the loop. */
+
+static void
+determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
+ bool no_other_refs)
+{
+ struct loop *nest, *aloop;
+ VEC (data_reference_p, heap) *datarefs = NULL;
+ VEC (ddr_p, heap) *dependences = NULL;
+ struct mem_ref_group *gr;
+ struct mem_ref *ref, *refb;
+ VEC (loop_p, heap) *vloops = NULL;
+ unsigned *loop_data_size;
+ unsigned i, j, n;
+ unsigned volume, dist, adist;
+ HOST_WIDE_INT vol;
+ data_reference_p dr;
+ ddr_p dep;
+
+ if (loop->inner)
+ return;
+
+ /* Find the outermost loop of the loop nest of loop (we require that
+ there are no sibling loops inside the nest). */
+ nest = loop;
+ while (1)
+ {
+ aloop = loop_outer (nest);
+
+ if (aloop == current_loops->tree_root
+ || aloop->inner->next)
+ break;
+
+ nest = aloop;
+ }
+
+ /* For each loop, determine the amount of data accessed in each iteration.
+ We use this to estimate whether the reference is evicted from the
+ cache before its reuse. */
+ find_loop_nest (nest, &vloops);
+ n = VEC_length (loop_p, vloops);
+ loop_data_size = XNEWVEC (unsigned, n);
+ volume = volume_of_references (refs);
+ i = n;
+ while (i-- != 0)
+ {
+ loop_data_size[i] = volume;
+ /* Bound the volume by the L2 cache size, since above this bound,
+ all dependence distances are equivalent. */
+ if (volume > L2_CACHE_SIZE_BYTES)
+ continue;
+
+ aloop = VEC_index (loop_p, vloops, i);
+ vol = estimated_loop_iterations_int (aloop, false);
+ if (vol < 0)
+ vol = expected_loop_iterations (aloop);
+ volume *= vol;
+ }
+
+ /* Prepare the references in the form suitable for data dependence
+ analysis. We ignore unanalyzable data references (the results
+ are used just as a heuristics to estimate temporality of the
+ references, hence we do not need to worry about correctness). */
+ for (gr = refs; gr; gr = gr->next)
+ for (ref = gr->refs; ref; ref = ref->next)
+ {
+ dr = create_data_ref (nest, ref->mem, ref->stmt, !ref->write_p);
+
+ if (dr)
+ {
+ ref->reuse_distance = volume;
+ dr->aux = ref;
+ VEC_safe_push (data_reference_p, heap, datarefs, dr);
+ }
+ else
+ no_other_refs = false;
+ }
+
+ for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
+ {
+ dist = self_reuse_distance (dr, loop_data_size, n, loop);
+ ref = dr->aux;
+ if (ref->reuse_distance > dist)
+ ref->reuse_distance = dist;
+
+ if (no_other_refs)
+ ref->independent_p = true;
+ }
+
+ compute_all_dependences (datarefs, &dependences, vloops, true);
+
+ for (i = 0; VEC_iterate (ddr_p, dependences, i, dep); i++)
+ {
+ if (DDR_ARE_DEPENDENT (dep) == chrec_known)
+ continue;
+
+ ref = DDR_A (dep)->aux;
+ refb = DDR_B (dep)->aux;
+
+ if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know
+ || DDR_NUM_DIST_VECTS (dep) == 0)
+ {
+ /* If the dependence cannot be analyzed, assume that there might be
+ a reuse. */
+ dist = 0;
+
+ ref->independent_p = false;
+ refb->independent_p = false;
+ }
+ else
+ {
+ /* The distance vectors are normalized to be always lexicographically
+ positive, hence we cannot tell just from them whether DDR_A comes
+ before DDR_B or vice versa. However, it is not important,
+ anyway -- if DDR_A is close to DDR_B, then it is either reused in
+ DDR_B (and it is not nontemporal), or it reuses the value of DDR_B
+ in cache (and marking it as nontemporal would not affect
+ anything). */
+
+ dist = volume;
+ for (j = 0; j < DDR_NUM_DIST_VECTS (dep); j++)
+ {
+ adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j),
+ loop_data_size, n);
+
+ /* If this is a dependence in the innermost loop (i.e., the
+ distances in all superloops are zero) and it is not
+ the trivial self-dependence with distance zero, record that
+ the references are not completely independent. */
+ if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1)
+ && (ref != refb
+ || DDR_DIST_VECT (dep, j)[n-1] != 0))
+ {
+ ref->independent_p = false;
+ refb->independent_p = false;
+ }
+
+ /* Ignore accesses closer than
+ L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION,
+ so that we use nontemporal prefetches e.g. if single memory
+ location is accessed several times in a single iteration of
+ the loop. */
+ if (adist < L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION)
+ continue;
+
+ if (adist < dist)
+ dist = adist;
+ }
+ }
+
+ if (ref->reuse_distance > dist)
+ ref->reuse_distance = dist;
+ if (refb->reuse_distance > dist)
+ refb->reuse_distance = dist;
+ }
+
+ free_dependence_relations (dependences);
+ free_data_refs (datarefs);
+ free (loop_data_size);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Reuse distances:\n");
+ for (gr = refs; gr; gr = gr->next)
+ for (ref = gr->refs; ref; ref = ref->next)
+ fprintf (dump_file, " ref %p distance %u\n",
+ (void *) ref, ref->reuse_distance);
+ }
+}
+