1 /* Perform various loop optimizations, including strength reduction.
2 Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
3 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* This is the loop optimization pass of the compiler.
23 It finds invariant computations within loops and moves them
24 to the beginning of the loop. Then it identifies basic and
25 general induction variables.
27 Basic induction variables (BIVs) are a pseudo registers which are set within
28 a loop only by incrementing or decrementing its value. General induction
29 variables (GIVs) are pseudo registers with a value which is a linear function
30 of a basic induction variable. BIVs are recognized by `basic_induction_var';
31 GIVs by `general_induction_var'.
33 Once induction variables are identified, strength reduction is applied to the
34 general induction variables, and induction variable elimination is applied to
35 the basic induction variables.
37 It also finds cases where
38 a register is set within the loop by zero-extending a narrower value
39 and changes these to zero the entire register once before the loop
40 and merely copy the low part within the loop.
42 Most of the complexity is in heuristics to decide when it is worth
43 while to do these things. */
47 #include "coretypes.h"
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
55 #include "insn-config.h"
65 #include "insn-flags.h"
70 /* Not really meaningful values, but at least something. */
71 #ifndef SIMULTANEOUS_PREFETCHES
72 #define SIMULTANEOUS_PREFETCHES 3
74 #ifndef PREFETCH_BLOCK
75 #define PREFETCH_BLOCK 32
78 #define HAVE_prefetch 0
79 #define CODE_FOR_prefetch 0
80 #define gen_prefetch(a,b,c) (abort(), NULL_RTX)
83 /* Give up the prefetch optimizations once we exceed a given threshold.
84 It is unlikely that we would be able to optimize something in a loop
85 with so many detected prefetches. */
86 #define MAX_PREFETCHES 100
87 /* The number of prefetch blocks that are beneficial to fetch at once before
88 a loop with a known (and low) iteration count. */
89 #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
90 /* For very tiny loops it is not worthwhile to prefetch even before the loop,
91 since it is likely that the data are already in the cache. */
92 #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
94 /* Parameterize some prefetch heuristics so they can be turned on and off
95 easily for performance testing on new architectures. These can be
96 defined in target-dependent files. */
98 /* Prefetch is worthwhile only when loads/stores are dense. */
99 #ifndef PREFETCH_ONLY_DENSE_MEM
100 #define PREFETCH_ONLY_DENSE_MEM 1
103 /* Define what we mean by "dense" loads and stores; This value divided by 256
104 is the minimum percentage of memory references that worth prefetching. */
105 #ifndef PREFETCH_DENSE_MEM
106 #define PREFETCH_DENSE_MEM 220
109 /* Do not prefetch for a loop whose iteration count is known to be low. */
110 #ifndef PREFETCH_NO_LOW_LOOPCNT
111 #define PREFETCH_NO_LOW_LOOPCNT 1
114 /* Define what we mean by a "low" iteration count. */
115 #ifndef PREFETCH_LOW_LOOPCNT
116 #define PREFETCH_LOW_LOOPCNT 32
119 /* Do not prefetch for a loop that contains a function call; such a loop is
120 probably not an internal loop. */
121 #ifndef PREFETCH_NO_CALL
122 #define PREFETCH_NO_CALL 1
125 /* Do not prefetch accesses with an extreme stride. */
126 #ifndef PREFETCH_NO_EXTREME_STRIDE
127 #define PREFETCH_NO_EXTREME_STRIDE 1
130 /* Define what we mean by an "extreme" stride. */
131 #ifndef PREFETCH_EXTREME_STRIDE
132 #define PREFETCH_EXTREME_STRIDE 4096
135 /* Define a limit to how far apart indices can be and still be merged
136 into a single prefetch. */
137 #ifndef PREFETCH_EXTREME_DIFFERENCE
138 #define PREFETCH_EXTREME_DIFFERENCE 4096
141 /* Issue prefetch instructions before the loop to fetch data to be used
142 in the first few loop iterations. */
143 #ifndef PREFETCH_BEFORE_LOOP
144 #define PREFETCH_BEFORE_LOOP 1
147 /* Do not handle reversed order prefetches (negative stride). */
148 #ifndef PREFETCH_NO_REVERSE_ORDER
149 #define PREFETCH_NO_REVERSE_ORDER 1
152 /* Prefetch even if the GIV is in conditional code. */
153 #ifndef PREFETCH_CONDITIONAL
154 #define PREFETCH_CONDITIONAL 1
157 #define LOOP_REG_LIFETIME(LOOP, REGNO) \
158 ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
160 #define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
161 ((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
162 || REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
164 #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
165 ((REGNO) < FIRST_PSEUDO_REGISTER \
166 ? (int) hard_regno_nregs[(REGNO)][GET_MODE (SET_DEST)] : 1)
169 /* Vector mapping INSN_UIDs to luids.
170 The luids are like uids but increase monotonically always.
171 We use them to see whether a jump comes from outside a given loop. */
175 /* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
176 number the insn is contained in. */
178 struct loop **uid_loop;
180 /* 1 + largest uid of any insn. */
182 int max_uid_for_loop;
184 /* Number of loops detected in current function. Used as index to the
187 static int max_loop_num;
189 /* Bound on pseudo register number before loop optimization.
190 A pseudo has valid regscan info if its number is < max_reg_before_loop. */
191 unsigned int max_reg_before_loop;
193 /* The value to pass to the next call of reg_scan_update. */
194 static int loop_max_reg;
196 /* During the analysis of a loop, a chain of `struct movable's
197 is made to record all the movable insns found.
198 Then the entire chain can be scanned to decide which to move. */
202 rtx insn; /* A movable insn */
203 rtx set_src; /* The expression this reg is set from. */
204 rtx set_dest; /* The destination of this SET. */
205 rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
206 of any registers used within the LIBCALL. */
207 int consec; /* Number of consecutive following insns
208 that must be moved with this one. */
209 unsigned int regno; /* The register it sets */
210 short lifetime; /* lifetime of that register;
211 may be adjusted when matching movables
212 that load the same value are found. */
213 short savings; /* Number of insns we can move for this reg,
214 including other movables that force this
215 or match this one. */
216 ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for
217 a low part that we should avoid changing when
218 clearing the rest of the reg. */
219 unsigned int cond : 1; /* 1 if only conditionally movable */
220 unsigned int force : 1; /* 1 means MUST move this insn */
221 unsigned int global : 1; /* 1 means reg is live outside this loop */
222 /* If PARTIAL is 1, GLOBAL means something different:
223 that the reg is live outside the range from where it is set
224 to the following label. */
225 unsigned int done : 1; /* 1 inhibits further processing of this */
227 unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
228 In particular, moving it does not make it
230 unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
231 load SRC, rather than copying INSN. */
232 unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
233 first insn of a consecutive sets group. */
234 unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
235 unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace
236 the original insn with a copy from that
237 pseudo, rather than deleting it. */
238 struct movable *match; /* First entry for same value */
239 struct movable *forces; /* An insn that must be moved if this is */
240 struct movable *next;
244 FILE *loop_dump_stream;
246 /* Forward declarations. */
248 static void invalidate_loops_containing_label (rtx);
249 static void find_and_verify_loops (rtx, struct loops *);
250 static void mark_loop_jump (rtx, struct loop *);
251 static void prescan_loop (struct loop *);
252 static int reg_in_basic_block_p (rtx, rtx);
253 static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx);
254 static int labels_in_range_p (rtx, int);
255 static void count_one_set (struct loop_regs *, rtx, rtx, rtx *);
256 static void note_addr_stored (rtx, rtx, void *);
257 static void note_set_pseudo_multiple_uses (rtx, rtx, void *);
258 static int loop_reg_used_before_p (const struct loop *, rtx, rtx);
259 static rtx find_regs_nested (rtx, rtx);
260 static void scan_loop (struct loop*, int);
262 static void replace_call_address (rtx, rtx, rtx);
264 static rtx skip_consec_insns (rtx, int);
265 static int libcall_benefit (rtx);
266 static rtx libcall_other_reg (rtx, rtx);
267 static void record_excess_regs (rtx, rtx, rtx *);
268 static void ignore_some_movables (struct loop_movables *);
269 static void force_movables (struct loop_movables *);
270 static void combine_movables (struct loop_movables *, struct loop_regs *);
271 static int num_unmoved_movables (const struct loop *);
272 static int regs_match_p (rtx, rtx, struct loop_movables *);
273 static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *,
275 static void add_label_notes (rtx, rtx);
276 static void move_movables (struct loop *loop, struct loop_movables *, int,
278 static void loop_movables_add (struct loop_movables *, struct movable *);
279 static void loop_movables_free (struct loop_movables *);
280 static int count_nonfixed_reads (const struct loop *, rtx);
281 static void loop_bivs_find (struct loop *);
282 static void loop_bivs_init_find (struct loop *);
283 static void loop_bivs_check (struct loop *);
284 static void loop_givs_find (struct loop *);
285 static void loop_givs_check (struct loop *);
286 static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int);
287 static int loop_giv_reduce_benefit (struct loop *, struct iv_class *,
288 struct induction *, rtx);
289 static void loop_givs_dead_check (struct loop *, struct iv_class *);
290 static void loop_givs_reduce (struct loop *, struct iv_class *);
291 static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *);
292 static void loop_ivs_free (struct loop *);
293 static void strength_reduce (struct loop *, int);
294 static void find_single_use_in_loop (struct loop_regs *, rtx, rtx);
295 static int valid_initial_value_p (rtx, rtx, int, rtx);
296 static void find_mem_givs (const struct loop *, rtx, rtx, int, int);
297 static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx,
298 rtx, rtx *, int, int);
299 static void check_final_value (const struct loop *, struct induction *);
300 static void loop_ivs_dump (const struct loop *, FILE *, int);
301 static void loop_iv_class_dump (const struct iv_class *, FILE *, int);
302 static void loop_biv_dump (const struct induction *, FILE *, int);
303 static void loop_giv_dump (const struct induction *, FILE *, int);
304 static void record_giv (const struct loop *, struct induction *, rtx, rtx,
305 rtx, rtx, rtx, rtx, int, enum g_types, int, int,
307 static void update_giv_derive (const struct loop *, rtx);
308 static void check_ext_dependent_givs (const struct loop *, struct iv_class *);
309 static int basic_induction_var (const struct loop *, rtx, enum machine_mode,
310 rtx, rtx, rtx *, rtx *, rtx **);
311 static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *);
312 static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *,
313 rtx *, rtx *, int, int *, enum machine_mode);
314 static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *,
315 rtx *, rtx *, rtx *);
316 static int check_dbra_loop (struct loop *, int);
317 static rtx express_from_1 (rtx, rtx, rtx);
318 static rtx combine_givs_p (struct induction *, struct induction *);
319 static int cmp_combine_givs_stats (const void *, const void *);
320 static void combine_givs (struct loop_regs *, struct iv_class *);
321 static int product_cheap_p (rtx, rtx);
322 static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int,
324 static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx,
325 struct iv_class *, int, basic_block, rtx);
326 static int last_use_this_basic_block (rtx, rtx);
327 static void record_initial (rtx, rtx, void *);
328 static void update_reg_last_use (rtx, rtx);
329 static rtx next_insn_in_loop (const struct loop *, rtx);
330 static void loop_regs_scan (const struct loop *, int);
331 static int count_insns_in_loop (const struct loop *);
332 static int find_mem_in_note_1 (rtx *, void *);
333 static rtx find_mem_in_note (rtx);
334 static void load_mems (const struct loop *);
335 static int insert_loop_mem (rtx *, void *);
336 static int replace_loop_mem (rtx *, void *);
337 static void replace_loop_mems (rtx, rtx, rtx, int);
338 static int replace_loop_reg (rtx *, void *);
339 static void replace_loop_regs (rtx insn, rtx, rtx);
340 static void note_reg_stored (rtx, rtx, void *);
341 static void try_copy_prop (const struct loop *, rtx, unsigned int);
342 static void try_swap_copy_prop (const struct loop *, rtx, unsigned int);
343 static rtx check_insn_for_givs (struct loop *, rtx, int, int);
344 static rtx check_insn_for_bivs (struct loop *, rtx, int, int);
345 static rtx gen_add_mult (rtx, rtx, rtx, rtx);
346 static void loop_regs_update (const struct loop *, rtx);
347 static int iv_add_mult_cost (rtx, rtx, rtx, rtx);
349 static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx);
350 static rtx loop_call_insn_emit_before (const struct loop *, basic_block,
352 static rtx loop_call_insn_hoist (const struct loop *, rtx);
353 static rtx loop_insn_sink_or_swim (const struct loop *, rtx);
355 static void loop_dump_aux (const struct loop *, FILE *, int);
356 static void loop_delete_insns (rtx, rtx);
357 static HOST_WIDE_INT remove_constant_addition (rtx *);
358 static rtx gen_load_of_final_value (rtx, rtx);
359 void debug_ivs (const struct loop *);
360 void debug_iv_class (const struct iv_class *);
361 void debug_biv (const struct induction *);
362 void debug_giv (const struct induction *);
363 void debug_loop (const struct loop *);
364 void debug_loops (const struct loops *);
366 typedef struct loop_replace_args
373 /* Nonzero iff INSN is between START and END, inclusive. */
374 #define INSN_IN_RANGE_P(INSN, START, END) \
375 (INSN_UID (INSN) < max_uid_for_loop \
376 && INSN_LUID (INSN) >= INSN_LUID (START) \
377 && INSN_LUID (INSN) <= INSN_LUID (END))
379 /* Indirect_jump_in_function is computed once per function. */
380 static int indirect_jump_in_function;
381 static int indirect_jump_in_function_p (rtx);
383 static int compute_luids (rtx, rtx, int);
385 static int biv_elimination_giv_has_0_offset (struct induction *,
386 struct induction *, rtx);
388 /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
389 copy the value of the strength reduced giv to its original register. */
390 static int copy_cost;
392 /* Cost of using a register, to normalize the benefits of a giv. */
393 static int reg_address_cost;
398 rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
400 reg_address_cost = address_cost (reg, SImode);
402 copy_cost = COSTS_N_INSNS (1);
405 /* Compute the mapping from uids to luids.
406 LUIDs are numbers assigned to insns, like uids,
407 except that luids increase monotonically through the code.
408 Start at insn START and stop just before END. Assign LUIDs
409 starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
411 compute_luids (rtx start, rtx end, int prev_luid)
416 for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
418 if (INSN_UID (insn) >= max_uid_for_loop)
420 /* Don't assign luids to line-number NOTEs, so that the distance in
421 luids between two insns is not affected by -g. */
423 || NOTE_LINE_NUMBER (insn) <= 0)
424 uid_luid[INSN_UID (insn)] = ++i;
426 /* Give a line number note the same luid as preceding insn. */
427 uid_luid[INSN_UID (insn)] = i;
432 /* Entry point of this file. Perform loop optimization
433 on the current function. F is the first insn of the function
434 and DUMPFILE is a stream for output of a trace of actions taken
435 (or 0 if none should be output). */
438 loop_optimize (rtx f, FILE *dumpfile, int flags)
442 struct loops loops_data;
443 struct loops *loops = &loops_data;
444 struct loop_info *loops_info;
446 loop_dump_stream = dumpfile;
448 init_recog_no_volatile ();
450 max_reg_before_loop = max_reg_num ();
451 loop_max_reg = max_reg_before_loop;
455 /* Count the number of loops. */
458 for (insn = f; insn; insn = NEXT_INSN (insn))
461 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
465 /* Don't waste time if no loops. */
466 if (max_loop_num == 0)
469 loops->num = max_loop_num;
471 /* Get size to use for tables indexed by uids.
472 Leave some space for labels allocated by find_and_verify_loops. */
473 max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
475 uid_luid = xcalloc (max_uid_for_loop, sizeof (int));
476 uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *));
478 /* Allocate storage for array of loops. */
479 loops->array = xcalloc (loops->num, sizeof (struct loop));
481 /* Find and process each loop.
482 First, find them, and record them in order of their beginnings. */
483 find_and_verify_loops (f, loops);
485 /* Allocate and initialize auxiliary loop information. */
486 loops_info = xcalloc (loops->num, sizeof (struct loop_info));
487 for (i = 0; i < (int) loops->num; i++)
488 loops->array[i].aux = loops_info + i;
490 /* Now find all register lifetimes. This must be done after
491 find_and_verify_loops, because it might reorder the insns in the
493 reg_scan (f, max_reg_before_loop, 1);
495 /* This must occur after reg_scan so that registers created by gcse
496 will have entries in the register tables.
498 We could have added a call to reg_scan after gcse_main in toplev.c,
499 but moving this call to init_alias_analysis is more efficient. */
500 init_alias_analysis ();
502 /* See if we went too far. Note that get_max_uid already returns
503 one more that the maximum uid of all insn. */
504 if (get_max_uid () > max_uid_for_loop)
506 /* Now reset it to the actual size we need. See above. */
507 max_uid_for_loop = get_max_uid ();
509 /* find_and_verify_loops has already called compute_luids, but it
510 might have rearranged code afterwards, so we need to recompute
512 compute_luids (f, NULL_RTX, 0);
514 /* Don't leave gaps in uid_luid for insns that have been
515 deleted. It is possible that the first or last insn
516 using some register has been deleted by cross-jumping.
517 Make sure that uid_luid for that former insn's uid
518 points to the general area where that insn used to be. */
519 for (i = 0; i < max_uid_for_loop; i++)
521 uid_luid[0] = uid_luid[i];
522 if (uid_luid[0] != 0)
525 for (i = 0; i < max_uid_for_loop; i++)
526 if (uid_luid[i] == 0)
527 uid_luid[i] = uid_luid[i - 1];
529 /* Determine if the function has indirect jump. On some systems
530 this prevents low overhead loop instructions from being used. */
531 indirect_jump_in_function = indirect_jump_in_function_p (f);
533 /* Now scan the loops, last ones first, since this means inner ones are done
534 before outer ones. */
535 for (i = max_loop_num - 1; i >= 0; i--)
537 struct loop *loop = &loops->array[i];
539 if (! loop->invalid && loop->end)
541 scan_loop (loop, flags);
546 end_alias_analysis ();
549 for (i = 0; i < (int) loops->num; i++)
550 free (loops_info[i].mems);
558 /* Returns the next insn, in execution order, after INSN. START and
559 END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
560 respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
561 insn-stream; it is used with loops that are entered near the
565 next_insn_in_loop (const struct loop *loop, rtx insn)
567 insn = NEXT_INSN (insn);
569 if (insn == loop->end)
572 /* Go to the top of the loop, and continue there. */
579 if (insn == loop->scan_start)
586 /* Find any register references hidden inside X and add them to
587 the dependency list DEPS. This is used to look inside CLOBBER (MEM
588 when checking whether a PARALLEL can be pulled out of a loop. */
591 find_regs_nested (rtx deps, rtx x)
593 enum rtx_code code = GET_CODE (x);
595 deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps);
598 const char *fmt = GET_RTX_FORMAT (code);
600 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
603 deps = find_regs_nested (deps, XEXP (x, i));
604 else if (fmt[i] == 'E')
605 for (j = 0; j < XVECLEN (x, i); j++)
606 deps = find_regs_nested (deps, XVECEXP (x, i, j));
612 /* Optimize one loop described by LOOP. */
614 /* ??? Could also move memory writes out of loops if the destination address
615 is invariant, the source is invariant, the memory write is not volatile,
616 and if we can prove that no read inside the loop can read this address
617 before the write occurs. If there is a read of this address after the
618 write, then we can also mark the memory read as invariant. */
621 scan_loop (struct loop *loop, int flags)
623 struct loop_info *loop_info = LOOP_INFO (loop);
624 struct loop_regs *regs = LOOP_REGS (loop);
626 rtx loop_start = loop->start;
627 rtx loop_end = loop->end;
629 /* 1 if we are scanning insns that could be executed zero times. */
631 /* 1 if we are scanning insns that might never be executed
632 due to a subroutine call which might exit before they are reached. */
634 /* Number of insns in the loop. */
637 rtx temp, update_start, update_end;
638 /* The SET from an insn, if it is the only SET in the insn. */
640 /* Chain describing insns movable in current loop. */
641 struct loop_movables *movables = LOOP_MOVABLES (loop);
642 /* Ratio of extra register life span we can justify
643 for saving an instruction. More if loop doesn't call subroutines
644 since in that case saving an insn makes more difference
645 and more registers are available. */
654 /* Determine whether this loop starts with a jump down to a test at
655 the end. This will occur for a small number of loops with a test
656 that is too complex to duplicate in front of the loop.
658 We search for the first insn or label in the loop, skipping NOTEs.
659 However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
660 (because we might have a loop executed only once that contains a
661 loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
662 (in case we have a degenerate loop).
664 Note that if we mistakenly think that a loop is entered at the top
665 when, in fact, it is entered at the exit test, the only effect will be
666 slightly poorer optimization. Making the opposite error can generate
667 incorrect code. Since very few loops now start with a jump to the
668 exit test, the code here to detect that case is very conservative. */
670 for (p = NEXT_INSN (loop_start);
672 && !LABEL_P (p) && ! INSN_P (p)
674 || (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
675 && NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
679 loop->scan_start = p;
681 /* If loop end is the end of the current function, then emit a
682 NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
683 note insn. This is the position we use when sinking insns out of
685 if (NEXT_INSN (loop->end) != 0)
686 loop->sink = NEXT_INSN (loop->end);
688 loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
690 /* Set up variables describing this loop. */
692 threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
694 /* If loop has a jump before the first label,
695 the true entry is the target of that jump.
696 Start scan from there.
697 But record in LOOP->TOP the place where the end-test jumps
698 back to so we can scan that after the end of the loop. */
700 /* Loop entry must be unconditional jump (and not a RETURN) */
701 && any_uncondjump_p (p)
702 && JUMP_LABEL (p) != 0
703 /* Check to see whether the jump actually
704 jumps out of the loop (meaning it's no loop).
705 This case can happen for things like
706 do {..} while (0). If this label was generated previously
707 by loop, we can't tell anything about it and have to reject
709 && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
711 loop->top = next_label (loop->scan_start);
712 loop->scan_start = JUMP_LABEL (p);
715 /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
716 as required by loop_reg_used_before_p. So skip such loops. (This
717 test may never be true, but it's best to play it safe.)
719 Also, skip loops where we do not start scanning at a label. This
720 test also rejects loops starting with a JUMP_INSN that failed the
723 if (INSN_UID (loop->scan_start) >= max_uid_for_loop
724 || !LABEL_P (loop->scan_start))
726 if (loop_dump_stream)
727 fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
728 INSN_UID (loop_start), INSN_UID (loop_end));
732 /* Allocate extra space for REGs that might be created by load_mems.
733 We allocate a little extra slop as well, in the hopes that we
734 won't have to reallocate the regs array. */
735 loop_regs_scan (loop, loop_info->mems_idx + 16);
736 insn_count = count_insns_in_loop (loop);
738 if (loop_dump_stream)
739 fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
740 INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
742 /* Scan through the loop finding insns that are safe to move.
743 Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
744 this reg will be considered invariant for subsequent insns.
745 We consider whether subsequent insns use the reg
746 in deciding whether it is worth actually moving.
748 MAYBE_NEVER is nonzero if we have passed a conditional jump insn
749 and therefore it is possible that the insns we are scanning
750 would never be executed. At such times, we must make sure
751 that it is safe to execute the insn once instead of zero times.
752 When MAYBE_NEVER is 0, all insns will be executed at least once
753 so that is not a problem. */
755 for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
757 p = next_insn_in_loop (loop, p))
759 if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
761 if (NONJUMP_INSN_P (p))
763 temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
767 && (set = single_set (p))
768 && REG_P (SET_DEST (set))
769 #ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
770 && SET_DEST (set) != pic_offset_table_rtx
772 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
778 rtx src = SET_SRC (set);
779 rtx dependencies = 0;
781 /* Figure out what to use as a source of this insn. If a
782 REG_EQUIV note is given or if a REG_EQUAL note with a
783 constant operand is specified, use it as the source and
784 mark that we should move this insn by calling
785 emit_move_insn rather that duplicating the insn.
787 Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
789 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
791 src = XEXP (temp, 0), move_insn = 1;
794 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
795 if (temp && CONSTANT_P (XEXP (temp, 0)))
796 src = XEXP (temp, 0), move_insn = 1;
797 if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
799 src = XEXP (temp, 0);
800 /* A libcall block can use regs that don't appear in
801 the equivalent expression. To move the libcall,
802 we must move those regs too. */
803 dependencies = libcall_other_reg (p, src);
807 /* For parallels, add any possible uses to the dependencies, as
808 we can't move the insn without resolving them first.
809 MEMs inside CLOBBERs may also reference registers; these
810 count as implicit uses. */
811 if (GET_CODE (PATTERN (p)) == PARALLEL)
813 for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
815 rtx x = XVECEXP (PATTERN (p), 0, i);
816 if (GET_CODE (x) == USE)
818 = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
820 else if (GET_CODE (x) == CLOBBER
821 && MEM_P (XEXP (x, 0)))
822 dependencies = find_regs_nested (dependencies,
823 XEXP (XEXP (x, 0), 0));
827 if (/* The register is used in basic blocks other
828 than the one where it is set (meaning that
829 something after this point in the loop might
830 depend on its value before the set). */
831 ! reg_in_basic_block_p (p, SET_DEST (set))
832 /* And the set is not guaranteed to be executed once
833 the loop starts, or the value before the set is
834 needed before the set occurs...
836 ??? Note we have quadratic behavior here, mitigated
837 by the fact that the previous test will often fail for
838 large loops. Rather than re-scanning the entire loop
839 each time for register usage, we should build tables
840 of the register usage and use them here instead. */
842 || loop_reg_used_before_p (loop, set, p)))
843 /* It is unsafe to move the set. However, it may be OK to
844 move the source into a new pseudo, and substitute a
845 reg-to-reg copy for the original insn.
847 This code used to consider it OK to move a set of a variable
848 which was not created by the user and not used in an exit
850 That behavior is incorrect and was removed. */
853 /* Don't try to optimize a MODE_CC set with a constant
854 source. It probably will be combined with a conditional
856 if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC
859 /* Don't try to optimize a register that was made
860 by loop-optimization for an inner loop.
861 We don't know its life-span, so we can't compute
863 else if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
865 /* Don't move the source and add a reg-to-reg copy:
866 - with -Os (this certainly increases size),
867 - if the mode doesn't support copy operations (obviously),
868 - if the source is already a reg (the motion will gain nothing),
869 - if the source is a legitimate constant (likewise). */
872 || ! can_copy_p (GET_MODE (SET_SRC (set)))
873 || REG_P (SET_SRC (set))
874 || (CONSTANT_P (SET_SRC (set))
875 && LEGITIMATE_CONSTANT_P (SET_SRC (set)))))
877 else if ((tem = loop_invariant_p (loop, src))
878 && (dependencies == 0
880 = loop_invariant_p (loop, dependencies)) != 0)
881 && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
883 = consec_sets_invariant_p
884 (loop, SET_DEST (set),
885 regs->array[REGNO (SET_DEST (set))].set_in_loop,
887 /* If the insn can cause a trap (such as divide by zero),
888 can't move it unless it's guaranteed to be executed
889 once loop is entered. Even a function call might
890 prevent the trap insn from being reached
891 (since it might exit!) */
892 && ! ((maybe_never || call_passed)
893 && may_trap_p (src)))
896 int regno = REGNO (SET_DEST (set));
898 /* A potential lossage is where we have a case where two insns
899 can be combined as long as they are both in the loop, but
900 we move one of them outside the loop. For large loops,
901 this can lose. The most common case of this is the address
902 of a function being called.
904 Therefore, if this register is marked as being used
905 exactly once if we are in a loop with calls
906 (a "large loop"), see if we can replace the usage of
907 this register with the source of this SET. If we can,
910 Don't do this if P has a REG_RETVAL note or if we have
911 SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */
913 if (loop_info->has_call
914 && regs->array[regno].single_usage != 0
915 && regs->array[regno].single_usage != const0_rtx
916 && REGNO_FIRST_UID (regno) == INSN_UID (p)
917 && (REGNO_LAST_UID (regno)
918 == INSN_UID (regs->array[regno].single_usage))
919 && regs->array[regno].set_in_loop == 1
920 && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
921 && ! side_effects_p (SET_SRC (set))
922 && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
923 && (! SMALL_REGISTER_CLASSES
924 || (! (REG_P (SET_SRC (set))
925 && (REGNO (SET_SRC (set))
926 < FIRST_PSEUDO_REGISTER))))
927 && regno >= FIRST_PSEUDO_REGISTER
928 /* This test is not redundant; SET_SRC (set) might be
929 a call-clobbered register and the life of REGNO
930 might span a call. */
931 && ! modified_between_p (SET_SRC (set), p,
932 regs->array[regno].single_usage)
933 && no_labels_between_p (p,
934 regs->array[regno].single_usage)
935 && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
936 regs->array[regno].single_usage))
938 /* Replace any usage in a REG_EQUAL note. Must copy
939 the new source, so that we don't get rtx sharing
940 between the SET_SOURCE and REG_NOTES of insn p. */
941 REG_NOTES (regs->array[regno].single_usage)
943 (REG_NOTES (regs->array[regno].single_usage),
944 SET_DEST (set), copy_rtx (SET_SRC (set))));
947 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
949 regs->array[regno+i].set_in_loop = 0;
953 m = xmalloc (sizeof (struct movable));
957 m->dependencies = dependencies;
958 m->set_dest = SET_DEST (set);
961 = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
965 m->move_insn = move_insn;
966 m->move_insn_first = 0;
967 m->insert_temp = insert_temp;
968 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
969 m->savemode = VOIDmode;
971 /* Set M->cond if either loop_invariant_p
972 or consec_sets_invariant_p returned 2
973 (only conditionally invariant). */
974 m->cond = ((tem | tem1 | tem2) > 1);
975 m->global = LOOP_REG_GLOBAL_P (loop, regno);
977 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
978 m->savings = regs->array[regno].n_times_set;
979 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
980 m->savings += libcall_benefit (p);
981 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
982 regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
983 /* Add M to the end of the chain MOVABLES. */
984 loop_movables_add (movables, m);
988 /* It is possible for the first instruction to have a
989 REG_EQUAL note but a non-invariant SET_SRC, so we must
990 remember the status of the first instruction in case
991 the last instruction doesn't have a REG_EQUAL note. */
992 m->move_insn_first = m->move_insn;
994 /* Skip this insn, not checking REG_LIBCALL notes. */
995 p = next_nonnote_insn (p);
996 /* Skip the consecutive insns, if there are any. */
997 p = skip_consec_insns (p, m->consec);
998 /* Back up to the last insn of the consecutive group. */
999 p = prev_nonnote_insn (p);
1001 /* We must now reset m->move_insn, m->is_equiv, and
1002 possibly m->set_src to correspond to the effects of
1004 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
1006 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1009 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
1010 if (temp && CONSTANT_P (XEXP (temp, 0)))
1011 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1017 = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
1020 /* If this register is always set within a STRICT_LOW_PART
1021 or set to zero, then its high bytes are constant.
1022 So clear them outside the loop and within the loop
1023 just load the low bytes.
1024 We must check that the machine has an instruction to do so.
1025 Also, if the value loaded into the register
1026 depends on the same register, this cannot be done. */
1027 else if (SET_SRC (set) == const0_rtx
1028 && NONJUMP_INSN_P (NEXT_INSN (p))
1029 && (set1 = single_set (NEXT_INSN (p)))
1030 && GET_CODE (set1) == SET
1031 && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
1032 && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
1033 && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
1035 && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
1037 int regno = REGNO (SET_DEST (set));
1038 if (regs->array[regno].set_in_loop == 2)
1041 m = xmalloc (sizeof (struct movable));
1044 m->set_dest = SET_DEST (set);
1045 m->dependencies = 0;
1051 m->move_insn_first = 0;
1052 m->insert_temp = insert_temp;
1054 /* If the insn may not be executed on some cycles,
1055 we can't clear the whole reg; clear just high part.
1056 Not even if the reg is used only within this loop.
1063 Clearing x before the inner loop could clobber a value
1064 being saved from the last time around the outer loop.
1065 However, if the reg is not used outside this loop
1066 and all uses of the register are in the same
1067 basic block as the store, there is no problem.
1069 If this insn was made by loop, we don't know its
1070 INSN_LUID and hence must make a conservative
1072 m->global = (INSN_UID (p) >= max_uid_for_loop
1073 || LOOP_REG_GLOBAL_P (loop, regno)
1074 || (labels_in_range_p
1075 (p, REGNO_FIRST_LUID (regno))));
1076 if (maybe_never && m->global)
1077 m->savemode = GET_MODE (SET_SRC (set1));
1079 m->savemode = VOIDmode;
1083 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1086 i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1088 regs->array[regno+i].set_in_loop = -1;
1089 /* Add M to the end of the chain MOVABLES. */
1090 loop_movables_add (movables, m);
1095 /* Past a call insn, we get to insns which might not be executed
1096 because the call might exit. This matters for insns that trap.
1097 Constant and pure call insns always return, so they don't count. */
1098 else if (CALL_P (p) && ! CONST_OR_PURE_CALL_P (p))
1100 /* Past a label or a jump, we get to insns for which we
1101 can't count on whether or how many times they will be
1102 executed during each iteration. Therefore, we can
1103 only move out sets of trivial variables
1104 (those not used after the loop). */
1105 /* Similar code appears twice in strength_reduce. */
1106 else if ((LABEL_P (p) || JUMP_P (p))
1107 /* If we enter the loop in the middle, and scan around to the
1108 beginning, don't set maybe_never for that. This must be an
1109 unconditional jump, otherwise the code at the top of the
1110 loop might never be executed. Unconditional jumps are
1111 followed by a barrier then the loop_end. */
1112 && ! (JUMP_P (p) && JUMP_LABEL (p) == loop->top
1113 && NEXT_INSN (NEXT_INSN (p)) == loop_end
1114 && any_uncondjump_p (p)))
1118 /* If one movable subsumes another, ignore that other. */
1120 ignore_some_movables (movables);
1122 /* For each movable insn, see if the reg that it loads
1123 leads when it dies right into another conditionally movable insn.
1124 If so, record that the second insn "forces" the first one,
1125 since the second can be moved only if the first is. */
1127 force_movables (movables);
1129 /* See if there are multiple movable insns that load the same value.
1130 If there are, make all but the first point at the first one
1131 through the `match' field, and add the priorities of them
1132 all together as the priority of the first. */
1134 combine_movables (movables, regs);
1136 /* Now consider each movable insn to decide whether it is worth moving.
1137 Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
1139 For machines with few registers this increases code size, so do not
1140 move moveables when optimizing for code size on such machines.
1141 (The 18 below is the value for i386.) */
1144 || (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
1146 move_movables (loop, movables, threshold, insn_count);
1148 /* Recalculate regs->array if move_movables has created new
1150 if (max_reg_num () > regs->num)
1152 loop_regs_scan (loop, 0);
1153 for (update_start = loop_start;
1154 PREV_INSN (update_start)
1155 && !LABEL_P (PREV_INSN (update_start));
1156 update_start = PREV_INSN (update_start))
1158 update_end = NEXT_INSN (loop_end);
1160 reg_scan_update (update_start, update_end, loop_max_reg);
1161 loop_max_reg = max_reg_num ();
1165 /* Now candidates that still are negative are those not moved.
1166 Change regs->array[I].set_in_loop to indicate that those are not actually
1168 for (i = 0; i < regs->num; i++)
1169 if (regs->array[i].set_in_loop < 0)
1170 regs->array[i].set_in_loop = regs->array[i].n_times_set;
1172 /* Now that we've moved some things out of the loop, we might be able to
1173 hoist even more memory references. */
1176 /* Recalculate regs->array if load_mems has created new registers. */
1177 if (max_reg_num () > regs->num)
1178 loop_regs_scan (loop, 0);
1180 for (update_start = loop_start;
1181 PREV_INSN (update_start)
1182 && !LABEL_P (PREV_INSN (update_start));
1183 update_start = PREV_INSN (update_start))
1185 update_end = NEXT_INSN (loop_end);
1187 reg_scan_update (update_start, update_end, loop_max_reg);
1188 loop_max_reg = max_reg_num ();
1190 if (flag_strength_reduce)
1192 if (update_end && LABEL_P (update_end))
1193 /* Ensure our label doesn't go away. */
1194 LABEL_NUSES (update_end)++;
1196 strength_reduce (loop, flags);
1198 reg_scan_update (update_start, update_end, loop_max_reg);
1199 loop_max_reg = max_reg_num ();
1201 if (update_end && LABEL_P (update_end)
1202 && --LABEL_NUSES (update_end) == 0)
1203 delete_related_insns (update_end);
1207 /* The movable information is required for strength reduction. */
1208 loop_movables_free (movables);
1215 /* Add elements to *OUTPUT to record all the pseudo-regs
1216 mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
1219 record_excess_regs (rtx in_this, rtx not_in_this, rtx *output)
1225 code = GET_CODE (in_this);
1239 if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
1240 && ! reg_mentioned_p (in_this, not_in_this))
1241 *output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
1248 fmt = GET_RTX_FORMAT (code);
1249 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1256 for (j = 0; j < XVECLEN (in_this, i); j++)
1257 record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
1261 record_excess_regs (XEXP (in_this, i), not_in_this, output);
1267 /* Check what regs are referred to in the libcall block ending with INSN,
1268 aside from those mentioned in the equivalent value.
1269 If there are none, return 0.
1270 If there are one or more, return an EXPR_LIST containing all of them. */
1273 libcall_other_reg (rtx insn, rtx equiv)
1275 rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
1276 rtx p = XEXP (note, 0);
1279 /* First, find all the regs used in the libcall block
1280 that are not mentioned as inputs to the result. */
1285 record_excess_regs (PATTERN (p), equiv, &output);
1292 /* Return 1 if all uses of REG
1293 are between INSN and the end of the basic block. */
1296 reg_in_basic_block_p (rtx insn, rtx reg)
1298 int regno = REGNO (reg);
1301 if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
1304 /* Search this basic block for the already recorded last use of the reg. */
1305 for (p = insn; p; p = NEXT_INSN (p))
1307 switch (GET_CODE (p))
1314 /* Ordinary insn: if this is the last use, we win. */
1315 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1320 /* Jump insn: if this is the last use, we win. */
1321 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1323 /* Otherwise, it's the end of the basic block, so we lose. */
1328 /* It's the end of the basic block, so we lose. */
1336 /* The "last use" that was recorded can't be found after the first
1337 use. This can happen when the last use was deleted while
1338 processing an inner loop, this inner loop was then completely
1339 unrolled, and the outer loop is always exited after the inner loop,
1340 so that everything after the first use becomes a single basic block. */
1344 /* Compute the benefit of eliminating the insns in the block whose
1345 last insn is LAST. This may be a group of insns used to compute a
1346 value directly or can contain a library call. */
1349 libcall_benefit (rtx last)
1354 for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
1355 insn != last; insn = NEXT_INSN (insn))
1358 benefit += 10; /* Assume at least this many insns in a library
1360 else if (NONJUMP_INSN_P (insn)
1361 && GET_CODE (PATTERN (insn)) != USE
1362 && GET_CODE (PATTERN (insn)) != CLOBBER)
1369 /* Skip COUNT insns from INSN, counting library calls as 1 insn. */
1372 skip_consec_insns (rtx insn, int count)
1374 for (; count > 0; count--)
1378 /* If first insn of libcall sequence, skip to end. */
1379 /* Do this at start of loop, since INSN is guaranteed to
1382 && (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
1383 insn = XEXP (temp, 0);
1386 insn = NEXT_INSN (insn);
1387 while (NOTE_P (insn));
1393 /* Ignore any movable whose insn falls within a libcall
1394 which is part of another movable.
1395 We make use of the fact that the movable for the libcall value
1396 was made later and so appears later on the chain. */
1399 ignore_some_movables (struct loop_movables *movables)
1401 struct movable *m, *m1;
1403 for (m = movables->head; m; m = m->next)
1405 /* Is this a movable for the value of a libcall? */
1406 rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
1410 /* Check for earlier movables inside that range,
1411 and mark them invalid. We cannot use LUIDs here because
1412 insns created by loop.c for prior loops don't have LUIDs.
1413 Rather than reject all such insns from movables, we just
1414 explicitly check each insn in the libcall (since invariant
1415 libcalls aren't that common). */
1416 for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
1417 for (m1 = movables->head; m1 != m; m1 = m1->next)
1418 if (m1->insn == insn)
1424 /* For each movable insn, see if the reg that it loads
1425 leads when it dies right into another conditionally movable insn.
1426 If so, record that the second insn "forces" the first one,
1427 since the second can be moved only if the first is. */
1430 force_movables (struct loop_movables *movables)
1432 struct movable *m, *m1;
1434 for (m1 = movables->head; m1; m1 = m1->next)
1435 /* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
1436 if (!m1->partial && !m1->done)
1438 int regno = m1->regno;
1439 for (m = m1->next; m; m = m->next)
1440 /* ??? Could this be a bug? What if CSE caused the
1441 register of M1 to be used after this insn?
1442 Since CSE does not update regno_last_uid,
1443 this insn M->insn might not be where it dies.
1444 But very likely this doesn't matter; what matters is
1445 that M's reg is computed from M1's reg. */
1446 if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
1449 if (m != 0 && m->set_src == m1->set_dest
1450 /* If m->consec, m->set_src isn't valid. */
1454 /* Increase the priority of the moving the first insn
1455 since it permits the second to be moved as well.
1456 Likewise for insns already forced by the first insn. */
1462 for (m2 = m1; m2; m2 = m2->forces)
1464 m2->lifetime += m->lifetime;
1465 m2->savings += m->savings;
1471 /* Find invariant expressions that are equal and can be combined into
1475 combine_movables (struct loop_movables *movables, struct loop_regs *regs)
1478 char *matched_regs = xmalloc (regs->num);
1479 enum machine_mode mode;
1481 /* Regs that are set more than once are not allowed to match
1482 or be matched. I'm no longer sure why not. */
1483 /* Only pseudo registers are allowed to match or be matched,
1484 since move_movables does not validate the change. */
1485 /* Perhaps testing m->consec_sets would be more appropriate here? */
1487 for (m = movables->head; m; m = m->next)
1488 if (m->match == 0 && regs->array[m->regno].n_times_set == 1
1489 && m->regno >= FIRST_PSEUDO_REGISTER
1494 int regno = m->regno;
1496 memset (matched_regs, 0, regs->num);
1497 matched_regs[regno] = 1;
1499 /* We want later insns to match the first one. Don't make the first
1500 one match any later ones. So start this loop at m->next. */
1501 for (m1 = m->next; m1; m1 = m1->next)
1502 if (m != m1 && m1->match == 0
1504 && regs->array[m1->regno].n_times_set == 1
1505 && m1->regno >= FIRST_PSEUDO_REGISTER
1506 /* A reg used outside the loop mustn't be eliminated. */
1508 /* A reg used for zero-extending mustn't be eliminated. */
1510 && (matched_regs[m1->regno]
1513 /* Can combine regs with different modes loaded from the
1514 same constant only if the modes are the same or
1515 if both are integer modes with M wider or the same
1516 width as M1. The check for integer is redundant, but
1517 safe, since the only case of differing destination
1518 modes with equal sources is when both sources are
1519 VOIDmode, i.e., CONST_INT. */
1520 (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
1521 || (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT
1522 && GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT
1523 && (GET_MODE_BITSIZE (GET_MODE (m->set_dest))
1524 >= GET_MODE_BITSIZE (GET_MODE (m1->set_dest)))))
1525 /* See if the source of M1 says it matches M. */
1526 && ((REG_P (m1->set_src)
1527 && matched_regs[REGNO (m1->set_src)])
1528 || rtx_equal_for_loop_p (m->set_src, m1->set_src,
1530 && ((m->dependencies == m1->dependencies)
1531 || rtx_equal_p (m->dependencies, m1->dependencies)))
1533 m->lifetime += m1->lifetime;
1534 m->savings += m1->savings;
1537 matched_regs[m1->regno] = 1;
1541 /* Now combine the regs used for zero-extension.
1542 This can be done for those not marked `global'
1543 provided their lives don't overlap. */
1545 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1546 mode = GET_MODE_WIDER_MODE (mode))
1548 struct movable *m0 = 0;
1550 /* Combine all the registers for extension from mode MODE.
1551 Don't combine any that are used outside this loop. */
1552 for (m = movables->head; m; m = m->next)
1553 if (m->partial && ! m->global
1554 && mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
1558 int first = REGNO_FIRST_LUID (m->regno);
1559 int last = REGNO_LAST_LUID (m->regno);
1563 /* First one: don't check for overlap, just record it. */
1568 /* Make sure they extend to the same mode.
1569 (Almost always true.) */
1570 if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
1573 /* We already have one: check for overlap with those
1574 already combined together. */
1575 for (m1 = movables->head; m1 != m; m1 = m1->next)
1576 if (m1 == m0 || (m1->partial && m1->match == m0))
1577 if (! (REGNO_FIRST_LUID (m1->regno) > last
1578 || REGNO_LAST_LUID (m1->regno) < first))
1581 /* No overlap: we can combine this with the others. */
1582 m0->lifetime += m->lifetime;
1583 m0->savings += m->savings;
1593 free (matched_regs);
1596 /* Returns the number of movable instructions in LOOP that were not
1597 moved outside the loop. */
1600 num_unmoved_movables (const struct loop *loop)
1605 for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
1613 /* Return 1 if regs X and Y will become the same if moved. */
1616 regs_match_p (rtx x, rtx y, struct loop_movables *movables)
1618 unsigned int xn = REGNO (x);
1619 unsigned int yn = REGNO (y);
1620 struct movable *mx, *my;
1622 for (mx = movables->head; mx; mx = mx->next)
1623 if (mx->regno == xn)
1626 for (my = movables->head; my; my = my->next)
1627 if (my->regno == yn)
1631 && ((mx->match == my->match && mx->match != 0)
1633 || mx == my->match));
1636 /* Return 1 if X and Y are identical-looking rtx's.
1637 This is the Lisp function EQUAL for rtx arguments.
1639 If two registers are matching movables or a movable register and an
1640 equivalent constant, consider them equal. */
1643 rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables,
1644 struct loop_regs *regs)
1654 if (x == 0 || y == 0)
1657 code = GET_CODE (x);
1659 /* If we have a register and a constant, they may sometimes be
1661 if (REG_P (x) && regs->array[REGNO (x)].set_in_loop == -2
1664 for (m = movables->head; m; m = m->next)
1665 if (m->move_insn && m->regno == REGNO (x)
1666 && rtx_equal_p (m->set_src, y))
1669 else if (REG_P (y) && regs->array[REGNO (y)].set_in_loop == -2
1672 for (m = movables->head; m; m = m->next)
1673 if (m->move_insn && m->regno == REGNO (y)
1674 && rtx_equal_p (m->set_src, x))
1678 /* Otherwise, rtx's of different codes cannot be equal. */
1679 if (code != GET_CODE (y))
1682 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
1683 (REG:SI x) and (REG:HI x) are NOT equivalent. */
1685 if (GET_MODE (x) != GET_MODE (y))
1688 /* These three types of rtx's can be compared nonrecursively. */
1690 return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
1692 if (code == LABEL_REF)
1693 return XEXP (x, 0) == XEXP (y, 0);
1694 if (code == SYMBOL_REF)
1695 return XSTR (x, 0) == XSTR (y, 0);
1697 /* Compare the elements. If any pair of corresponding elements
1698 fail to match, return 0 for the whole things. */
1700 fmt = GET_RTX_FORMAT (code);
1701 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1706 if (XWINT (x, i) != XWINT (y, i))
1711 if (XINT (x, i) != XINT (y, i))
1716 /* Two vectors must have the same length. */
1717 if (XVECLEN (x, i) != XVECLEN (y, i))
1720 /* And the corresponding elements must match. */
1721 for (j = 0; j < XVECLEN (x, i); j++)
1722 if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
1723 movables, regs) == 0)
1728 if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
1734 if (strcmp (XSTR (x, i), XSTR (y, i)))
1739 /* These are just backpointers, so they don't matter. */
1745 /* It is believed that rtx's at this level will never
1746 contain anything but integers and other rtx's,
1747 except for within LABEL_REFs and SYMBOL_REFs. */
1755 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
1756 insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
1757 references is incremented once for each added note. */
1760 add_label_notes (rtx x, rtx insns)
1762 enum rtx_code code = GET_CODE (x);
1767 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
1769 /* This code used to ignore labels that referred to dispatch tables to
1770 avoid flow generating (slightly) worse code.
1772 We no longer ignore such label references (see LABEL_REF handling in
1773 mark_jump_label for additional information). */
1774 for (insn = insns; insn; insn = NEXT_INSN (insn))
1775 if (reg_mentioned_p (XEXP (x, 0), insn))
1777 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
1779 if (LABEL_P (XEXP (x, 0)))
1780 LABEL_NUSES (XEXP (x, 0))++;
1784 fmt = GET_RTX_FORMAT (code);
1785 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1788 add_label_notes (XEXP (x, i), insns);
1789 else if (fmt[i] == 'E')
1790 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1791 add_label_notes (XVECEXP (x, i, j), insns);
1795 /* Scan MOVABLES, and move the insns that deserve to be moved.
1796 If two matching movables are combined, replace one reg with the
1797 other throughout. */
1800 move_movables (struct loop *loop, struct loop_movables *movables,
1801 int threshold, int insn_count)
1803 struct loop_regs *regs = LOOP_REGS (loop);
1804 int nregs = regs->num;
1808 rtx loop_start = loop->start;
1809 rtx loop_end = loop->end;
1810 /* Map of pseudo-register replacements to handle combining
1811 when we move several insns that load the same value
1812 into different pseudo-registers. */
1813 rtx *reg_map = xcalloc (nregs, sizeof (rtx));
1814 char *already_moved = xcalloc (nregs, sizeof (char));
1816 for (m = movables->head; m; m = m->next)
1818 /* Describe this movable insn. */
1820 if (loop_dump_stream)
1822 fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
1823 INSN_UID (m->insn), m->regno, m->lifetime);
1825 fprintf (loop_dump_stream, "consec %d, ", m->consec);
1827 fprintf (loop_dump_stream, "cond ");
1829 fprintf (loop_dump_stream, "force ");
1831 fprintf (loop_dump_stream, "global ");
1833 fprintf (loop_dump_stream, "done ");
1835 fprintf (loop_dump_stream, "move-insn ");
1837 fprintf (loop_dump_stream, "matches %d ",
1838 INSN_UID (m->match->insn));
1840 fprintf (loop_dump_stream, "forces %d ",
1841 INSN_UID (m->forces->insn));
1844 /* Ignore the insn if it's already done (it matched something else).
1845 Otherwise, see if it is now safe to move. */
1849 || (1 == loop_invariant_p (loop, m->set_src)
1850 && (m->dependencies == 0
1851 || 1 == loop_invariant_p (loop, m->dependencies))
1853 || 1 == consec_sets_invariant_p (loop, m->set_dest,
1856 && (! m->forces || m->forces->done))
1860 int savings = m->savings;
1862 /* We have an insn that is safe to move.
1863 Compute its desirability. */
1868 if (loop_dump_stream)
1869 fprintf (loop_dump_stream, "savings %d ", savings);
1871 if (regs->array[regno].moved_once && loop_dump_stream)
1872 fprintf (loop_dump_stream, "halved since already moved ");
1874 /* An insn MUST be moved if we already moved something else
1875 which is safe only if this one is moved too: that is,
1876 if already_moved[REGNO] is nonzero. */
1878 /* An insn is desirable to move if the new lifetime of the
1879 register is no more than THRESHOLD times the old lifetime.
1880 If it's not desirable, it means the loop is so big
1881 that moving won't speed things up much,
1882 and it is liable to make register usage worse. */
1884 /* It is also desirable to move if it can be moved at no
1885 extra cost because something else was already moved. */
1887 if (already_moved[regno]
1888 || (threshold * savings * m->lifetime) >=
1889 (regs->array[regno].moved_once ? insn_count * 2 : insn_count)
1890 || (m->forces && m->forces->done
1891 && regs->array[m->forces->regno].n_times_set == 1))
1895 rtx first = NULL_RTX;
1896 rtx newreg = NULL_RTX;
1899 newreg = gen_reg_rtx (GET_MODE (m->set_dest));
1901 /* Now move the insns that set the reg. */
1903 if (m->partial && m->match)
1907 /* Find the end of this chain of matching regs.
1908 Thus, we load each reg in the chain from that one reg.
1909 And that reg is loaded with 0 directly,
1910 since it has ->match == 0. */
1911 for (m1 = m; m1->match; m1 = m1->match);
1912 newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
1913 SET_DEST (PATTERN (m1->insn)));
1914 i1 = loop_insn_hoist (loop, newpat);
1916 /* Mark the moved, invariant reg as being allowed to
1917 share a hard reg with the other matching invariant. */
1918 REG_NOTES (i1) = REG_NOTES (m->insn);
1919 r1 = SET_DEST (PATTERN (m->insn));
1920 r2 = SET_DEST (PATTERN (m1->insn));
1922 = gen_rtx_EXPR_LIST (VOIDmode, r1,
1923 gen_rtx_EXPR_LIST (VOIDmode, r2,
1925 delete_insn (m->insn);
1930 if (loop_dump_stream)
1931 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
1933 /* If we are to re-generate the item being moved with a
1934 new move insn, first delete what we have and then emit
1935 the move insn before the loop. */
1936 else if (m->move_insn)
1940 for (count = m->consec; count >= 0; count--)
1942 /* If this is the first insn of a library call sequence,
1943 something is very wrong. */
1945 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
1948 /* If this is the last insn of a libcall sequence, then
1949 delete every insn in the sequence except the last.
1950 The last insn is handled in the normal manner. */
1952 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
1954 temp = XEXP (temp, 0);
1956 temp = delete_insn (temp);
1960 p = delete_insn (p);
1962 /* simplify_giv_expr expects that it can walk the insns
1963 at m->insn forwards and see this old sequence we are
1964 tossing here. delete_insn does preserve the next
1965 pointers, but when we skip over a NOTE we must fix
1966 it up. Otherwise that code walks into the non-deleted
1968 while (p && NOTE_P (p))
1969 p = NEXT_INSN (temp) = NEXT_INSN (p);
1973 /* Replace the original insn with a move from
1974 our newly created temp. */
1976 emit_move_insn (m->set_dest, newreg);
1979 emit_insn_before (seq, p);
1984 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
1989 add_label_notes (m->set_src, seq);
1991 i1 = loop_insn_hoist (loop, seq);
1992 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
1993 set_unique_reg_note (i1,
1994 m->is_equiv ? REG_EQUIV : REG_EQUAL,
1997 if (loop_dump_stream)
1998 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
2000 /* The more regs we move, the less we like moving them. */
2005 for (count = m->consec; count >= 0; count--)
2009 /* If first insn of libcall sequence, skip to end. */
2010 /* Do this at start of loop, since p is guaranteed to
2013 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
2016 /* If last insn of libcall sequence, move all
2017 insns except the last before the loop. The last
2018 insn is handled in the normal manner. */
2020 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
2024 rtx fn_address_insn = 0;
2027 for (temp = XEXP (temp, 0); temp != p;
2028 temp = NEXT_INSN (temp))
2037 body = PATTERN (temp);
2039 /* Find the next insn after TEMP,
2040 not counting USE or NOTE insns. */
2041 for (next = NEXT_INSN (temp); next != p;
2042 next = NEXT_INSN (next))
2043 if (! (NONJUMP_INSN_P (next)
2044 && GET_CODE (PATTERN (next)) == USE)
2048 /* If that is the call, this may be the insn
2049 that loads the function address.
2051 Extract the function address from the insn
2052 that loads it into a register.
2053 If this insn was cse'd, we get incorrect code.
2055 So emit a new move insn that copies the
2056 function address into the register that the
2057 call insn will use. flow.c will delete any
2058 redundant stores that we have created. */
2060 && GET_CODE (body) == SET
2061 && REG_P (SET_DEST (body))
2062 && (n = find_reg_note (temp, REG_EQUAL,
2065 fn_reg = SET_SRC (body);
2066 if (!REG_P (fn_reg))
2067 fn_reg = SET_DEST (body);
2068 fn_address = XEXP (n, 0);
2069 fn_address_insn = temp;
2071 /* We have the call insn.
2072 If it uses the register we suspect it might,
2073 load it with the correct address directly. */
2076 && reg_referenced_p (fn_reg, body))
2077 loop_insn_emit_after (loop, 0, fn_address_insn,
2079 (fn_reg, fn_address));
2083 i1 = loop_call_insn_hoist (loop, body);
2084 /* Because the USAGE information potentially
2085 contains objects other than hard registers
2086 we need to copy it. */
2087 if (CALL_INSN_FUNCTION_USAGE (temp))
2088 CALL_INSN_FUNCTION_USAGE (i1)
2089 = copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
2092 i1 = loop_insn_hoist (loop, body);
2095 if (temp == fn_address_insn)
2096 fn_address_insn = i1;
2097 REG_NOTES (i1) = REG_NOTES (temp);
2098 REG_NOTES (temp) = NULL;
2104 if (m->savemode != VOIDmode)
2106 /* P sets REG to zero; but we should clear only
2107 the bits that are not covered by the mode
2109 rtx reg = m->set_dest;
2114 tem = expand_simple_binop
2115 (GET_MODE (reg), AND, reg,
2116 GEN_INT ((((HOST_WIDE_INT) 1
2117 << GET_MODE_BITSIZE (m->savemode)))
2119 reg, 1, OPTAB_LIB_WIDEN);
2123 emit_move_insn (reg, tem);
2124 sequence = get_insns ();
2126 i1 = loop_insn_hoist (loop, sequence);
2128 else if (CALL_P (p))
2130 i1 = loop_call_insn_hoist (loop, PATTERN (p));
2131 /* Because the USAGE information potentially
2132 contains objects other than hard registers
2133 we need to copy it. */
2134 if (CALL_INSN_FUNCTION_USAGE (p))
2135 CALL_INSN_FUNCTION_USAGE (i1)
2136 = copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
2138 else if (count == m->consec && m->move_insn_first)
2141 /* The SET_SRC might not be invariant, so we must
2142 use the REG_EQUAL note. */
2144 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2149 add_label_notes (m->set_src, seq);
2151 i1 = loop_insn_hoist (loop, seq);
2152 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2153 set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
2154 : REG_EQUAL, m->set_src);
2156 else if (m->insert_temp)
2158 rtx *reg_map2 = xcalloc (REGNO (newreg),
2160 reg_map2 [m->regno] = newreg;
2162 i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
2163 replace_regs (i1, reg_map2, REGNO (newreg), 1);
2167 i1 = loop_insn_hoist (loop, PATTERN (p));
2169 if (REG_NOTES (i1) == 0)
2171 REG_NOTES (i1) = REG_NOTES (p);
2172 REG_NOTES (p) = NULL;
2174 /* If there is a REG_EQUAL note present whose value
2175 is not loop invariant, then delete it, since it
2176 may cause problems with later optimization passes.
2177 It is possible for cse to create such notes
2178 like this as a result of record_jump_cond. */
2180 if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
2181 && ! loop_invariant_p (loop, XEXP (temp, 0)))
2182 remove_note (i1, temp);
2188 if (loop_dump_stream)
2189 fprintf (loop_dump_stream, " moved to %d",
2192 /* If library call, now fix the REG_NOTES that contain
2193 insn pointers, namely REG_LIBCALL on FIRST
2194 and REG_RETVAL on I1. */
2195 if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
2197 XEXP (temp, 0) = first;
2198 temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
2199 XEXP (temp, 0) = i1;
2206 /* simplify_giv_expr expects that it can walk the insns
2207 at m->insn forwards and see this old sequence we are
2208 tossing here. delete_insn does preserve the next
2209 pointers, but when we skip over a NOTE we must fix
2210 it up. Otherwise that code walks into the non-deleted
2212 while (p && NOTE_P (p))
2213 p = NEXT_INSN (temp) = NEXT_INSN (p);
2218 /* Replace the original insn with a move from
2219 our newly created temp. */
2221 emit_move_insn (m->set_dest, newreg);
2224 emit_insn_before (seq, p);
2228 /* The more regs we move, the less we like moving them. */
2234 if (!m->insert_temp)
2236 /* Any other movable that loads the same register
2238 already_moved[regno] = 1;
2240 /* This reg has been moved out of one loop. */
2241 regs->array[regno].moved_once = 1;
2243 /* The reg set here is now invariant. */
2247 for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
2248 regs->array[regno+i].set_in_loop = 0;
2251 /* Change the length-of-life info for the register
2252 to say it lives at least the full length of this loop.
2253 This will help guide optimizations in outer loops. */
2255 if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
2256 /* This is the old insn before all the moved insns.
2257 We can't use the moved insn because it is out of range
2258 in uid_luid. Only the old insns have luids. */
2259 REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
2260 if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
2261 REGNO_LAST_UID (regno) = INSN_UID (loop_end);
2264 /* Combine with this moved insn any other matching movables. */
2267 for (m1 = movables->head; m1; m1 = m1->next)
2272 /* Schedule the reg loaded by M1
2273 for replacement so that shares the reg of M.
2274 If the modes differ (only possible in restricted
2275 circumstances, make a SUBREG.
2277 Note this assumes that the target dependent files
2278 treat REG and SUBREG equally, including within
2279 GO_IF_LEGITIMATE_ADDRESS and in all the
2280 predicates since we never verify that replacing the
2281 original register with a SUBREG results in a
2282 recognizable insn. */
2283 if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest))
2284 reg_map[m1->regno] = m->set_dest;
2287 = gen_lowpart_common (GET_MODE (m1->set_dest),
2290 /* Get rid of the matching insn
2291 and prevent further processing of it. */
2294 /* If library call, delete all insns. */
2295 if ((temp = find_reg_note (m1->insn, REG_RETVAL,
2297 delete_insn_chain (XEXP (temp, 0), m1->insn);
2299 delete_insn (m1->insn);
2301 /* Any other movable that loads the same register
2303 already_moved[m1->regno] = 1;
2305 /* The reg merged here is now invariant,
2306 if the reg it matches is invariant. */
2311 i < LOOP_REGNO_NREGS (regno, m1->set_dest);
2313 regs->array[m1->regno+i].set_in_loop = 0;
2317 else if (loop_dump_stream)
2318 fprintf (loop_dump_stream, "not desirable");
2320 else if (loop_dump_stream && !m->match)
2321 fprintf (loop_dump_stream, "not safe");
2323 if (loop_dump_stream)
2324 fprintf (loop_dump_stream, "\n");
2328 new_start = loop_start;
2330 /* Go through all the instructions in the loop, making
2331 all the register substitutions scheduled in REG_MAP. */
2332 for (p = new_start; p != loop_end; p = NEXT_INSN (p))
2335 replace_regs (PATTERN (p), reg_map, nregs, 0);
2336 replace_regs (REG_NOTES (p), reg_map, nregs, 0);
2342 free (already_moved);
2347 loop_movables_add (struct loop_movables *movables, struct movable *m)
2349 if (movables->head == 0)
2352 movables->last->next = m;
2358 loop_movables_free (struct loop_movables *movables)
2361 struct movable *m_next;
2363 for (m = movables->head; m; m = m_next)
2371 /* Scan X and replace the address of any MEM in it with ADDR.
2372 REG is the address that MEM should have before the replacement. */
2375 replace_call_address (rtx x, rtx reg, rtx addr)
2383 code = GET_CODE (x);
2397 /* Short cut for very common case. */
2398 replace_call_address (XEXP (x, 1), reg, addr);
2402 /* Short cut for very common case. */
2403 replace_call_address (XEXP (x, 0), reg, addr);
2407 /* If this MEM uses a reg other than the one we expected,
2408 something is wrong. */
2409 if (XEXP (x, 0) != reg)
2418 fmt = GET_RTX_FORMAT (code);
2419 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2422 replace_call_address (XEXP (x, i), reg, addr);
2423 else if (fmt[i] == 'E')
2426 for (j = 0; j < XVECLEN (x, i); j++)
2427 replace_call_address (XVECEXP (x, i, j), reg, addr);
2433 /* Return the number of memory refs to addresses that vary
2437 count_nonfixed_reads (const struct loop *loop, rtx x)
2447 code = GET_CODE (x);
2461 return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
2462 + count_nonfixed_reads (loop, XEXP (x, 0)));
2469 fmt = GET_RTX_FORMAT (code);
2470 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2473 value += count_nonfixed_reads (loop, XEXP (x, i));
2477 for (j = 0; j < XVECLEN (x, i); j++)
2478 value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
2484 /* Scan a loop setting the elements `loops_enclosed',
2485 `has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
2486 `unknown_address_altered', `unknown_constant_address_altered', and
2487 `num_mem_sets' in LOOP. Also, fill in the array `mems' and the
2488 list `store_mems' in LOOP. */
2491 prescan_loop (struct loop *loop)
2495 struct loop_info *loop_info = LOOP_INFO (loop);
2496 rtx start = loop->start;
2497 rtx end = loop->end;
2498 /* The label after END. Jumping here is just like falling off the
2499 end of the loop. We use next_nonnote_insn instead of next_label
2500 as a hedge against the (pathological) case where some actual insn
2501 might end up between the two. */
2502 rtx exit_target = next_nonnote_insn (end);
2504 loop_info->has_indirect_jump = indirect_jump_in_function;
2505 loop_info->pre_header_has_call = 0;
2506 loop_info->has_call = 0;
2507 loop_info->has_nonconst_call = 0;
2508 loop_info->has_prefetch = 0;
2509 loop_info->has_volatile = 0;
2510 loop_info->has_tablejump = 0;
2511 loop_info->has_multiple_exit_targets = 0;
2514 loop_info->unknown_address_altered = 0;
2515 loop_info->unknown_constant_address_altered = 0;
2516 loop_info->store_mems = NULL_RTX;
2517 loop_info->first_loop_store_insn = NULL_RTX;
2518 loop_info->mems_idx = 0;
2519 loop_info->num_mem_sets = 0;
2520 /* If loop opts run twice, this was set on 1st pass for 2nd. */
2521 loop_info->preconditioned = NOTE_PRECONDITIONED (end);
2523 for (insn = start; insn && !LABEL_P (insn);
2524 insn = PREV_INSN (insn))
2528 loop_info->pre_header_has_call = 1;
2533 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2534 insn = NEXT_INSN (insn))
2536 switch (GET_CODE (insn))
2539 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
2542 /* Count number of loops contained in this one. */
2545 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
2550 if (! CONST_OR_PURE_CALL_P (insn))
2552 loop_info->unknown_address_altered = 1;
2553 loop_info->has_nonconst_call = 1;
2555 else if (pure_call_p (insn))
2556 loop_info->has_nonconst_call = 1;
2557 loop_info->has_call = 1;
2558 if (can_throw_internal (insn))
2559 loop_info->has_multiple_exit_targets = 1;
2563 if (! loop_info->has_multiple_exit_targets)
2565 rtx set = pc_set (insn);
2569 rtx src = SET_SRC (set);
2572 if (GET_CODE (src) == IF_THEN_ELSE)
2574 label1 = XEXP (src, 1);
2575 label2 = XEXP (src, 2);
2585 if (label1 && label1 != pc_rtx)
2587 if (GET_CODE (label1) != LABEL_REF)
2589 /* Something tricky. */
2590 loop_info->has_multiple_exit_targets = 1;
2593 else if (XEXP (label1, 0) != exit_target
2594 && LABEL_OUTSIDE_LOOP_P (label1))
2596 /* A jump outside the current loop. */
2597 loop_info->has_multiple_exit_targets = 1;
2609 /* A return, or something tricky. */
2610 loop_info->has_multiple_exit_targets = 1;
2616 if (volatile_refs_p (PATTERN (insn)))
2617 loop_info->has_volatile = 1;
2620 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2621 || GET_CODE (PATTERN (insn)) == ADDR_VEC))
2622 loop_info->has_tablejump = 1;
2624 note_stores (PATTERN (insn), note_addr_stored, loop_info);
2625 if (! loop_info->first_loop_store_insn && loop_info->store_mems)
2626 loop_info->first_loop_store_insn = insn;
2628 if (flag_non_call_exceptions && can_throw_internal (insn))
2629 loop_info->has_multiple_exit_targets = 1;
2637 /* Now, rescan the loop, setting up the LOOP_MEMS array. */
2638 if (/* An exception thrown by a called function might land us
2640 ! loop_info->has_nonconst_call
2641 /* We don't want loads for MEMs moved to a location before the
2642 one at which their stack memory becomes allocated. (Note
2643 that this is not a problem for malloc, etc., since those
2644 require actual function calls. */
2645 && ! current_function_calls_alloca
2646 /* There are ways to leave the loop other than falling off the
2648 && ! loop_info->has_multiple_exit_targets)
2649 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2650 insn = NEXT_INSN (insn))
2651 for_each_rtx (&insn, insert_loop_mem, loop_info);
2653 /* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
2654 that loop_invariant_p and load_mems can use true_dependence
2655 to determine what is really clobbered. */
2656 if (loop_info->unknown_address_altered)
2658 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2660 loop_info->store_mems
2661 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2663 if (loop_info->unknown_constant_address_altered)
2665 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2666 MEM_READONLY_P (mem) = 1;
2667 loop_info->store_mems
2668 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2672 /* Invalidate all loops containing LABEL. */
2675 invalidate_loops_containing_label (rtx label)
2678 for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
2682 /* Scan the function looking for loops. Record the start and end of each loop.
2683 Also mark as invalid loops any loops that contain a setjmp or are branched
2684 to from outside the loop. */
2687 find_and_verify_loops (rtx f, struct loops *loops)
2692 struct loop *current_loop;
2693 struct loop *next_loop;
2696 num_loops = loops->num;
2698 compute_luids (f, NULL_RTX, 0);
2700 /* If there are jumps to undefined labels,
2701 treat them as jumps out of any/all loops.
2702 This also avoids writing past end of tables when there are no loops. */
2705 /* Find boundaries of loops, mark which loops are contained within
2706 loops, and invalidate loops that have setjmp. */
2709 current_loop = NULL;
2710 for (insn = f; insn; insn = NEXT_INSN (insn))
2713 switch (NOTE_LINE_NUMBER (insn))
2715 case NOTE_INSN_LOOP_BEG:
2716 next_loop = loops->array + num_loops;
2717 next_loop->num = num_loops;
2719 next_loop->start = insn;
2720 next_loop->outer = current_loop;
2721 current_loop = next_loop;
2724 case NOTE_INSN_LOOP_END:
2728 current_loop->end = insn;
2729 current_loop = current_loop->outer;
2737 && find_reg_note (insn, REG_SETJMP, NULL))
2739 /* In this case, we must invalidate our current loop and any
2741 for (loop = current_loop; loop; loop = loop->outer)
2744 if (loop_dump_stream)
2745 fprintf (loop_dump_stream,
2746 "\nLoop at %d ignored due to setjmp.\n",
2747 INSN_UID (loop->start));
2751 /* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
2752 enclosing loop, but this doesn't matter. */
2753 uid_loop[INSN_UID (insn)] = current_loop;
2756 /* Any loop containing a label used in an initializer must be invalidated,
2757 because it can be jumped into from anywhere. */
2758 for (label = forced_labels; label; label = XEXP (label, 1))
2759 invalidate_loops_containing_label (XEXP (label, 0));
2761 /* Any loop containing a label used for an exception handler must be
2762 invalidated, because it can be jumped into from anywhere. */
2763 for_each_eh_label (invalidate_loops_containing_label);
2765 /* Now scan all insn's in the function. If any JUMP_INSN branches into a
2766 loop that it is not contained within, that loop is marked invalid.
2767 If any INSN or CALL_INSN uses a label's address, then the loop containing
2768 that label is marked invalid, because it could be jumped into from
2771 Also look for blocks of code ending in an unconditional branch that
2772 exits the loop. If such a block is surrounded by a conditional
2773 branch around the block, move the block elsewhere (see below) and
2774 invert the jump to point to the code block. This may eliminate a
2775 label in our loop and will simplify processing by both us and a
2776 possible second cse pass. */
2778 for (insn = f; insn; insn = NEXT_INSN (insn))
2781 struct loop *this_loop = uid_loop[INSN_UID (insn)];
2783 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
2785 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
2787 invalidate_loops_containing_label (XEXP (note, 0));
2793 mark_loop_jump (PATTERN (insn), this_loop);
2795 /* See if this is an unconditional branch outside the loop. */
2797 && (GET_CODE (PATTERN (insn)) == RETURN
2798 || (any_uncondjump_p (insn)
2799 && onlyjump_p (insn)
2800 && (uid_loop[INSN_UID (JUMP_LABEL (insn))]
2802 && get_max_uid () < max_uid_for_loop)
2805 rtx our_next = next_real_insn (insn);
2806 rtx last_insn_to_move = NEXT_INSN (insn);
2807 struct loop *dest_loop;
2808 struct loop *outer_loop = NULL;
2810 /* Go backwards until we reach the start of the loop, a label,
2812 for (p = PREV_INSN (insn);
2815 && NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
2820 /* Check for the case where we have a jump to an inner nested
2821 loop, and do not perform the optimization in that case. */
2823 if (JUMP_LABEL (insn))
2825 dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
2828 for (outer_loop = dest_loop; outer_loop;
2829 outer_loop = outer_loop->outer)
2830 if (outer_loop == this_loop)
2835 /* Make sure that the target of P is within the current loop. */
2837 if (JUMP_P (p) && JUMP_LABEL (p)
2838 && uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
2839 outer_loop = this_loop;
2841 /* If we stopped on a JUMP_INSN to the next insn after INSN,
2842 we have a block of code to try to move.
2844 We look backward and then forward from the target of INSN
2845 to find a BARRIER at the same loop depth as the target.
2846 If we find such a BARRIER, we make a new label for the start
2847 of the block, invert the jump in P and point it to that label,
2848 and move the block of code to the spot we found. */
2852 && JUMP_LABEL (p) != 0
2853 /* Just ignore jumps to labels that were never emitted.
2854 These always indicate compilation errors. */
2855 && INSN_UID (JUMP_LABEL (p)) != 0
2856 && any_condjump_p (p) && onlyjump_p (p)
2857 && next_real_insn (JUMP_LABEL (p)) == our_next
2858 /* If it's not safe to move the sequence, then we
2860 && insns_safe_to_move_p (p, NEXT_INSN (insn),
2861 &last_insn_to_move))
2864 = JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
2865 struct loop *target_loop = uid_loop[INSN_UID (target)];
2869 /* Search for possible garbage past the conditional jumps
2870 and look for the last barrier. */
2871 for (tmp = last_insn_to_move;
2872 tmp && !LABEL_P (tmp); tmp = NEXT_INSN (tmp))
2873 if (BARRIER_P (tmp))
2874 last_insn_to_move = tmp;
2876 for (loc = target; loc; loc = PREV_INSN (loc))
2878 /* Don't move things inside a tablejump. */
2879 && ((loc2 = next_nonnote_insn (loc)) == 0
2881 || (loc2 = next_nonnote_insn (loc2)) == 0
2883 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2884 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2885 && uid_loop[INSN_UID (loc)] == target_loop)
2889 for (loc = target; loc; loc = NEXT_INSN (loc))
2891 /* Don't move things inside a tablejump. */
2892 && ((loc2 = next_nonnote_insn (loc)) == 0
2894 || (loc2 = next_nonnote_insn (loc2)) == 0
2896 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2897 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2898 && uid_loop[INSN_UID (loc)] == target_loop)
2903 rtx cond_label = JUMP_LABEL (p);
2904 rtx new_label = get_label_after (p);
2906 /* Ensure our label doesn't go away. */
2907 LABEL_NUSES (cond_label)++;
2909 /* Verify that uid_loop is large enough and that
2911 if (invert_jump (p, new_label, 1))
2915 /* If no suitable BARRIER was found, create a suitable
2916 one before TARGET. Since TARGET is a fall through
2917 path, we'll need to insert a jump around our block
2918 and add a BARRIER before TARGET.
2920 This creates an extra unconditional jump outside
2921 the loop. However, the benefits of removing rarely
2922 executed instructions from inside the loop usually
2923 outweighs the cost of the extra unconditional jump
2924 outside the loop. */
2929 temp = gen_jump (JUMP_LABEL (insn));
2930 temp = emit_jump_insn_before (temp, target);
2931 JUMP_LABEL (temp) = JUMP_LABEL (insn);
2932 LABEL_NUSES (JUMP_LABEL (insn))++;
2933 loc = emit_barrier_before (target);
2936 /* Include the BARRIER after INSN and copy the
2938 if (squeeze_notes (&new_label, &last_insn_to_move))
2940 reorder_insns (new_label, last_insn_to_move, loc);
2942 /* All those insns are now in TARGET_LOOP. */
2944 q != NEXT_INSN (last_insn_to_move);
2946 uid_loop[INSN_UID (q)] = target_loop;
2948 /* The label jumped to by INSN is no longer a loop
2949 exit. Unless INSN does not have a label (e.g.,
2950 it is a RETURN insn), search loop->exit_labels
2951 to find its label_ref, and remove it. Also turn
2952 off LABEL_OUTSIDE_LOOP_P bit. */
2953 if (JUMP_LABEL (insn))
2955 for (q = 0, r = this_loop->exit_labels;
2957 q = r, r = LABEL_NEXTREF (r))
2958 if (XEXP (r, 0) == JUMP_LABEL (insn))
2960 LABEL_OUTSIDE_LOOP_P (r) = 0;
2962 LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
2964 this_loop->exit_labels = LABEL_NEXTREF (r);
2968 for (loop = this_loop; loop && loop != target_loop;
2972 /* If we didn't find it, then something is
2978 /* P is now a jump outside the loop, so it must be put
2979 in loop->exit_labels, and marked as such.
2980 The easiest way to do this is to just call
2981 mark_loop_jump again for P. */
2982 mark_loop_jump (PATTERN (p), this_loop);
2984 /* If INSN now jumps to the insn after it,
2986 if (JUMP_LABEL (insn) != 0
2987 && (next_real_insn (JUMP_LABEL (insn))
2988 == next_real_insn (insn)))
2989 delete_related_insns (insn);
2992 /* Continue the loop after where the conditional
2993 branch used to jump, since the only branch insn
2994 in the block (if it still remains) is an inter-loop
2995 branch and hence needs no processing. */
2996 insn = NEXT_INSN (cond_label);
2998 if (--LABEL_NUSES (cond_label) == 0)
2999 delete_related_insns (cond_label);
3001 /* This loop will be continued with NEXT_INSN (insn). */
3002 insn = PREV_INSN (insn);
3009 /* If any label in X jumps to a loop different from LOOP_NUM and any of the
3010 loops it is contained in, mark the target loop invalid.
3012 For speed, we assume that X is part of a pattern of a JUMP_INSN. */
3015 mark_loop_jump (rtx x, struct loop *loop)
3017 struct loop *dest_loop;
3018 struct loop *outer_loop;
3021 switch (GET_CODE (x))
3034 /* There could be a label reference in here. */
3035 mark_loop_jump (XEXP (x, 0), loop);
3041 mark_loop_jump (XEXP (x, 0), loop);
3042 mark_loop_jump (XEXP (x, 1), loop);
3046 /* This may refer to a LABEL_REF or SYMBOL_REF. */
3047 mark_loop_jump (XEXP (x, 1), loop);
3052 mark_loop_jump (XEXP (x, 0), loop);
3056 dest_loop = uid_loop[INSN_UID (XEXP (x, 0))];
3058 /* Link together all labels that branch outside the loop. This
3059 is used by final_[bg]iv_value and the loop unrolling code. Also
3060 mark this LABEL_REF so we know that this branch should predict
3063 /* A check to make sure the label is not in an inner nested loop,
3064 since this does not count as a loop exit. */
3067 for (outer_loop = dest_loop; outer_loop;
3068 outer_loop = outer_loop->outer)
3069 if (outer_loop == loop)
3075 if (loop && ! outer_loop)
3077 LABEL_OUTSIDE_LOOP_P (x) = 1;
3078 LABEL_NEXTREF (x) = loop->exit_labels;
3079 loop->exit_labels = x;
3081 for (outer_loop = loop;
3082 outer_loop && outer_loop != dest_loop;
3083 outer_loop = outer_loop->outer)
3084 outer_loop->exit_count++;
3087 /* If this is inside a loop, but not in the current loop or one enclosed
3088 by it, it invalidates at least one loop. */
3093 /* We must invalidate every nested loop containing the target of this
3094 label, except those that also contain the jump insn. */
3096 for (; dest_loop; dest_loop = dest_loop->outer)
3098 /* Stop when we reach a loop that also contains the jump insn. */
3099 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3100 if (dest_loop == outer_loop)
3103 /* If we get here, we know we need to invalidate a loop. */
3104 if (loop_dump_stream && ! dest_loop->invalid)
3105 fprintf (loop_dump_stream,
3106 "\nLoop at %d ignored due to multiple entry points.\n",
3107 INSN_UID (dest_loop->start));
3109 dest_loop->invalid = 1;
3114 /* If this is not setting pc, ignore. */
3115 if (SET_DEST (x) == pc_rtx)
3116 mark_loop_jump (SET_SRC (x), loop);
3120 mark_loop_jump (XEXP (x, 1), loop);
3121 mark_loop_jump (XEXP (x, 2), loop);
3126 for (i = 0; i < XVECLEN (x, 0); i++)
3127 mark_loop_jump (XVECEXP (x, 0, i), loop);
3131 for (i = 0; i < XVECLEN (x, 1); i++)
3132 mark_loop_jump (XVECEXP (x, 1, i), loop);
3136 /* Strictly speaking this is not a jump into the loop, only a possible
3137 jump out of the loop. However, we have no way to link the destination
3138 of this jump onto the list of exit labels. To be safe we mark this
3139 loop and any containing loops as invalid. */
3142 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3144 if (loop_dump_stream && ! outer_loop->invalid)
3145 fprintf (loop_dump_stream,
3146 "\nLoop at %d ignored due to unknown exit jump.\n",
3147 INSN_UID (outer_loop->start));
3148 outer_loop->invalid = 1;
3155 /* Return nonzero if there is a label in the range from
3156 insn INSN to and including the insn whose luid is END
3157 INSN must have an assigned luid (i.e., it must not have
3158 been previously created by loop.c). */
3161 labels_in_range_p (rtx insn, int end)
3163 while (insn && INSN_LUID (insn) <= end)
3167 insn = NEXT_INSN (insn);
3173 /* Record that a memory reference X is being set. */
3176 note_addr_stored (rtx x, rtx y ATTRIBUTE_UNUSED,
3177 void *data ATTRIBUTE_UNUSED)
3179 struct loop_info *loop_info = data;
3181 if (x == 0 || !MEM_P (x))
3184 /* Count number of memory writes.
3185 This affects heuristics in strength_reduce. */
3186 loop_info->num_mem_sets++;
3188 /* BLKmode MEM means all memory is clobbered. */
3189 if (GET_MODE (x) == BLKmode)
3191 if (MEM_READONLY_P (x))
3192 loop_info->unknown_constant_address_altered = 1;
3194 loop_info->unknown_address_altered = 1;
3199 loop_info->store_mems = gen_rtx_EXPR_LIST (VOIDmode, x,
3200 loop_info->store_mems);
3203 /* X is a value modified by an INSN that references a biv inside a loop
3204 exit test (ie, X is somehow related to the value of the biv). If X
3205 is a pseudo that is used more than once, then the biv is (effectively)
3206 used more than once. DATA is a pointer to a loop_regs structure. */
3209 note_set_pseudo_multiple_uses (rtx x, rtx y ATTRIBUTE_UNUSED, void *data)
3211 struct loop_regs *regs = (struct loop_regs *) data;
3216 while (GET_CODE (x) == STRICT_LOW_PART
3217 || GET_CODE (x) == SIGN_EXTRACT
3218 || GET_CODE (x) == ZERO_EXTRACT
3219 || GET_CODE (x) == SUBREG)
3222 if (!REG_P (x) || REGNO (x) < FIRST_PSEUDO_REGISTER)
3225 /* If we do not have usage information, or if we know the register
3226 is used more than once, note that fact for check_dbra_loop. */
3227 if (REGNO (x) >= max_reg_before_loop
3228 || ! regs->array[REGNO (x)].single_usage
3229 || regs->array[REGNO (x)].single_usage == const0_rtx)
3230 regs->multiple_uses = 1;
3233 /* Return nonzero if the rtx X is invariant over the current loop.
3235 The value is 2 if we refer to something only conditionally invariant.
3237 A memory ref is invariant if it is not volatile and does not conflict
3238 with anything stored in `loop_info->store_mems'. */
3241 loop_invariant_p (const struct loop *loop, rtx x)
3243 struct loop_info *loop_info = LOOP_INFO (loop);
3244 struct loop_regs *regs = LOOP_REGS (loop);
3248 int conditional = 0;
3253 code = GET_CODE (x);
3263 /* A LABEL_REF is normally invariant, however, if we are unrolling
3264 loops, and this label is inside the loop, then it isn't invariant.
3265 This is because each unrolled copy of the loop body will have
3266 a copy of this label. If this was invariant, then an insn loading
3267 the address of this label into a register might get moved outside
3268 the loop, and then each loop body would end up using the same label.
3270 We don't know the loop bounds here though, so just fail for all
3272 if (flag_old_unroll_loops)
3279 case UNSPEC_VOLATILE:
3283 if ((x == frame_pointer_rtx || x == hard_frame_pointer_rtx
3284 || x == arg_pointer_rtx || x == pic_offset_table_rtx)
3285 && ! current_function_has_nonlocal_goto)
3288 if (LOOP_INFO (loop)->has_call
3289 && REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
3292 /* Out-of-range regs can occur when we are called from unrolling.
3293 These registers created by the unroller are set in the loop,
3294 hence are never invariant.
3295 Other out-of-range regs can be generated by load_mems; those that
3296 are written to in the loop are not invariant, while those that are
3297 not written to are invariant. It would be easy for load_mems
3298 to set n_times_set correctly for these registers, however, there
3299 is no easy way to distinguish them from registers created by the
3302 if (REGNO (x) >= (unsigned) regs->num)
3305 if (regs->array[REGNO (x)].set_in_loop < 0)
3308 return regs->array[REGNO (x)].set_in_loop == 0;
3311 /* Volatile memory references must be rejected. Do this before
3312 checking for read-only items, so that volatile read-only items
3313 will be rejected also. */
3314 if (MEM_VOLATILE_P (x))
3317 /* See if there is any dependence between a store and this load. */
3318 mem_list_entry = loop_info->store_mems;
3319 while (mem_list_entry)
3321 if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
3325 mem_list_entry = XEXP (mem_list_entry, 1);
3328 /* It's not invalidated by a store in memory
3329 but we must still verify the address is invariant. */
3333 /* Don't mess with insns declared volatile. */
3334 if (MEM_VOLATILE_P (x))
3342 fmt = GET_RTX_FORMAT (code);
3343 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3347 int tem = loop_invariant_p (loop, XEXP (x, i));
3353 else if (fmt[i] == 'E')
3356 for (j = 0; j < XVECLEN (x, i); j++)
3358 int tem = loop_invariant_p (loop, XVECEXP (x, i, j));
3368 return 1 + conditional;
3371 /* Return nonzero if all the insns in the loop that set REG
3372 are INSN and the immediately following insns,
3373 and if each of those insns sets REG in an invariant way
3374 (not counting uses of REG in them).
3376 The value is 2 if some of these insns are only conditionally invariant.
3378 We assume that INSN itself is the first set of REG
3379 and that its source is invariant. */
3382 consec_sets_invariant_p (const struct loop *loop, rtx reg, int n_sets,
3385 struct loop_regs *regs = LOOP_REGS (loop);
3387 unsigned int regno = REGNO (reg);
3389 /* Number of sets we have to insist on finding after INSN. */
3390 int count = n_sets - 1;
3391 int old = regs->array[regno].set_in_loop;
3395 /* If N_SETS hit the limit, we can't rely on its value. */
3399 regs->array[regno].set_in_loop = 0;
3407 code = GET_CODE (p);
3409 /* If library call, skip to end of it. */
3410 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
3415 && (set = single_set (p))
3416 && REG_P (SET_DEST (set))
3417 && REGNO (SET_DEST (set)) == regno)
3419 this = loop_invariant_p (loop, SET_SRC (set));
3422 else if ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX)))
3424 /* If this is a libcall, then any invariant REG_EQUAL note is OK.
3425 If this is an ordinary insn, then only CONSTANT_P REG_EQUAL
3427 this = (CONSTANT_P (XEXP (temp, 0))
3428 || (find_reg_note (p, REG_RETVAL, NULL_RTX)
3429 && loop_invariant_p (loop, XEXP (temp, 0))));
3436 else if (code != NOTE)
3438 regs->array[regno].set_in_loop = old;
3443 regs->array[regno].set_in_loop = old;
3444 /* If loop_invariant_p ever returned 2, we return 2. */
3445 return 1 + (value & 2);
3448 /* Look at all uses (not sets) of registers in X. For each, if it is
3449 the single use, set USAGE[REGNO] to INSN; if there was a previous use in
3450 a different insn, set USAGE[REGNO] to const0_rtx. */
3453 find_single_use_in_loop (struct loop_regs *regs, rtx insn, rtx x)
3455 enum rtx_code code = GET_CODE (x);
3456 const char *fmt = GET_RTX_FORMAT (code);
3460 regs->array[REGNO (x)].single_usage
3461 = (regs->array[REGNO (x)].single_usage != 0
3462 && regs->array[REGNO (x)].single_usage != insn)
3463 ? const0_rtx : insn;
3465 else if (code == SET)
3467 /* Don't count SET_DEST if it is a REG; otherwise count things
3468 in SET_DEST because if a register is partially modified, it won't
3469 show up as a potential movable so we don't care how USAGE is set
3471 if (!REG_P (SET_DEST (x)))
3472 find_single_use_in_loop (regs, insn, SET_DEST (x));
3473 find_single_use_in_loop (regs, insn, SET_SRC (x));
3476 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3478 if (fmt[i] == 'e' && XEXP (x, i) != 0)
3479 find_single_use_in_loop (regs, insn, XEXP (x, i));
3480 else if (fmt[i] == 'E')
3481 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3482 find_single_use_in_loop (regs, insn, XVECEXP (x, i, j));
3486 /* Count and record any set in X which is contained in INSN. Update
3487 REGS->array[I].MAY_NOT_OPTIMIZE and LAST_SET for any register I set
3491 count_one_set (struct loop_regs *regs, rtx insn, rtx x, rtx *last_set)
3493 if (GET_CODE (x) == CLOBBER && REG_P (XEXP (x, 0)))
3494 /* Don't move a reg that has an explicit clobber.
3495 It's not worth the pain to try to do it correctly. */
3496 regs->array[REGNO (XEXP (x, 0))].may_not_optimize = 1;
3498 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
3500 rtx dest = SET_DEST (x);
3501 while (GET_CODE (dest) == SUBREG
3502 || GET_CODE (dest) == ZERO_EXTRACT
3503 || GET_CODE (dest) == SIGN_EXTRACT
3504 || GET_CODE (dest) == STRICT_LOW_PART)
3505 dest = XEXP (dest, 0);
3509 int regno = REGNO (dest);
3510 for (i = 0; i < LOOP_REGNO_NREGS (regno, dest); i++)
3512 /* If this is the first setting of this reg
3513 in current basic block, and it was set before,
3514 it must be set in two basic blocks, so it cannot
3515 be moved out of the loop. */
3516 if (regs->array[regno].set_in_loop > 0
3517 && last_set[regno] == 0)
3518 regs->array[regno+i].may_not_optimize = 1;
3519 /* If this is not first setting in current basic block,
3520 see if reg was used in between previous one and this.
3521 If so, neither one can be moved. */
3522 if (last_set[regno] != 0
3523 && reg_used_between_p (dest, last_set[regno], insn))
3524 regs->array[regno+i].may_not_optimize = 1;
3525 if (regs->array[regno+i].set_in_loop < 127)
3526 ++regs->array[regno+i].set_in_loop;
3527 last_set[regno+i] = insn;
3533 /* Given a loop that is bounded by LOOP->START and LOOP->END and that
3534 is entered at LOOP->SCAN_START, return 1 if the register set in SET
3535 contained in insn INSN is used by any insn that precedes INSN in
3536 cyclic order starting from the loop entry point.
3538 We don't want to use INSN_LUID here because if we restrict INSN to those
3539 that have a valid INSN_LUID, it means we cannot move an invariant out
3540 from an inner loop past two loops. */
3543 loop_reg_used_before_p (const struct loop *loop, rtx set, rtx insn)
3545 rtx reg = SET_DEST (set);
3548 /* Scan forward checking for register usage. If we hit INSN, we
3549 are done. Otherwise, if we hit LOOP->END, wrap around to LOOP->START. */
3550 for (p = loop->scan_start; p != insn; p = NEXT_INSN (p))
3552 if (INSN_P (p) && reg_overlap_mentioned_p (reg, PATTERN (p)))
3563 /* Information we collect about arrays that we might want to prefetch. */
3564 struct prefetch_info
3566 struct iv_class *class; /* Class this prefetch is based on. */
3567 struct induction *giv; /* GIV this prefetch is based on. */
3568 rtx base_address; /* Start prefetching from this address plus
3570 HOST_WIDE_INT index;
3571 HOST_WIDE_INT stride; /* Prefetch stride in bytes in each
3573 unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
3574 prefetch area in one iteration. */
3575 unsigned int total_bytes; /* Total bytes loop will access in this block.
3576 This is set only for loops with known
3577 iteration counts and is 0xffffffff
3579 int prefetch_in_loop; /* Number of prefetch insns in loop. */
3580 int prefetch_before_loop; /* Number of prefetch insns before loop. */
3581 unsigned int write : 1; /* 1 for read/write prefetches. */
3584 /* Data used by check_store function. */
3585 struct check_store_data
3591 static void check_store (rtx, rtx, void *);
3592 static void emit_prefetch_instructions (struct loop *);
3593 static int rtx_equal_for_prefetch_p (rtx, rtx);
3595 /* Set mem_write when mem_address is found. Used as callback to
3598 check_store (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
3600 struct check_store_data *d = (struct check_store_data *) data;
3602 if ((MEM_P (x)) && rtx_equal_p (d->mem_address, XEXP (x, 0)))
3606 /* Like rtx_equal_p, but attempts to swap commutative operands. This is
3607 important to get some addresses combined. Later more sophisticated
3608 transformations can be added when necessary.
3610 ??? Same trick with swapping operand is done at several other places.
3611 It can be nice to develop some common way to handle this. */
3614 rtx_equal_for_prefetch_p (rtx x, rtx y)
3618 enum rtx_code code = GET_CODE (x);
3623 if (code != GET_CODE (y))
3626 if (COMMUTATIVE_ARITH_P (x))
3628 return ((rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 0))
3629 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 1)))
3630 || (rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 1))
3631 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 0))));
3634 /* Compare the elements. If any pair of corresponding elements fails to
3635 match, return 0 for the whole thing. */
3637 fmt = GET_RTX_FORMAT (code);
3638 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3643 if (XWINT (x, i) != XWINT (y, i))
3648 if (XINT (x, i) != XINT (y, i))
3653 /* Two vectors must have the same length. */
3654 if (XVECLEN (x, i) != XVECLEN (y, i))
3657 /* And the corresponding elements must match. */
3658 for (j = 0; j < XVECLEN (x, i); j++)
3659 if (rtx_equal_for_prefetch_p (XVECEXP (x, i, j),
3660 XVECEXP (y, i, j)) == 0)
3665 if (rtx_equal_for_prefetch_p (XEXP (x, i), XEXP (y, i)) == 0)
3670 if (strcmp (XSTR (x, i), XSTR (y, i)))
3675 /* These are just backpointers, so they don't matter. */
3681 /* It is believed that rtx's at this level will never
3682 contain anything but integers and other rtx's,
3683 except for within LABEL_REFs and SYMBOL_REFs. */
3691 /* Remove constant addition value from the expression X (when present)
3694 static HOST_WIDE_INT
3695 remove_constant_addition (rtx *x)
3697 HOST_WIDE_INT addval = 0;
3700 /* Avoid clobbering a shared CONST expression. */
3701 if (GET_CODE (exp) == CONST)
3703 if (GET_CODE (XEXP (exp, 0)) == PLUS
3704 && GET_CODE (XEXP (XEXP (exp, 0), 0)) == SYMBOL_REF
3705 && GET_CODE (XEXP (XEXP (exp, 0), 1)) == CONST_INT)
3707 *x = XEXP (XEXP (exp, 0), 0);
3708 return INTVAL (XEXP (XEXP (exp, 0), 1));
3713 if (GET_CODE (exp) == CONST_INT)
3715 addval = INTVAL (exp);
3719 /* For plus expression recurse on ourself. */
3720 else if (GET_CODE (exp) == PLUS)
3722 addval += remove_constant_addition (&XEXP (exp, 0));
3723 addval += remove_constant_addition (&XEXP (exp, 1));
3725 /* In case our parameter was constant, remove extra zero from the
3727 if (XEXP (exp, 0) == const0_rtx)
3729 else if (XEXP (exp, 1) == const0_rtx)
3736 /* Attempt to identify accesses to arrays that are most likely to cause cache
3737 misses, and emit prefetch instructions a few prefetch blocks forward.
3739 To detect the arrays we use the GIV information that was collected by the
3740 strength reduction pass.
3742 The prefetch instructions are generated after the GIV information is done
3743 and before the strength reduction process. The new GIVs are injected into
3744 the strength reduction tables, so the prefetch addresses are optimized as
3747 GIVs are split into base address, stride, and constant addition values.
3748 GIVs with the same address, stride and close addition values are combined
3749 into a single prefetch. Also writes to GIVs are detected, so that prefetch
3750 for write instructions can be used for the block we write to, on machines
3751 that support write prefetches.
3753 Several heuristics are used to determine when to prefetch. They are
3754 controlled by defined symbols that can be overridden for each target. */
3757 emit_prefetch_instructions (struct loop *loop)
3759 int num_prefetches = 0;
3760 int num_real_prefetches = 0;
3761 int num_real_write_prefetches = 0;
3762 int num_prefetches_before = 0;
3763 int num_write_prefetches_before = 0;
3766 struct iv_class *bl;
3767 struct induction *iv;
3768 struct prefetch_info info[MAX_PREFETCHES];
3769 struct loop_ivs *ivs = LOOP_IVS (loop);
3774 /* Consider only loops w/o calls. When a call is done, the loop is probably
3775 slow enough to read the memory. */
3776 if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call)
3778 if (loop_dump_stream)
3779 fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n");
3784 /* Don't prefetch in loops known to have few iterations. */
3785 if (PREFETCH_NO_LOW_LOOPCNT
3786 && LOOP_INFO (loop)->n_iterations
3787 && LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT)
3789 if (loop_dump_stream)
3790 fprintf (loop_dump_stream,
3791 "Prefetch: ignoring loop: not enough iterations.\n");
3795 /* Search all induction variables and pick those interesting for the prefetch
3797 for (bl = ivs->list; bl; bl = bl->next)
3799 struct induction *biv = bl->biv, *biv1;
3804 /* Expect all BIVs to be executed in each iteration. This makes our
3805 analysis more conservative. */
3808 /* Discard non-constant additions that we can't handle well yet, and
3809 BIVs that are executed multiple times; such BIVs ought to be
3810 handled in the nested loop. We accept not_every_iteration BIVs,
3811 since these only result in larger strides and make our
3812 heuristics more conservative. */
3813 if (GET_CODE (biv->add_val) != CONST_INT)
3815 if (loop_dump_stream)
3817 fprintf (loop_dump_stream,
3818 "Prefetch: ignoring biv %d: non-constant addition at insn %d:",
3819 REGNO (biv->src_reg), INSN_UID (biv->insn));
3820 print_rtl (loop_dump_stream, biv->add_val);
3821 fprintf (loop_dump_stream, "\n");