1 /* Sign extension elimination optimization for GNU compiler.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3 Contributed by Leehod Baruch <leehod@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 -Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 In order to support 32bit computations on a 64bit machine, sign
25 extension instructions are generated to ensure the correctness of
27 A possible policy (as currently implemented) is to generate a sign
28 extension right after each 32bit computation.
29 Depending on the instruction set of the architecture, some of these
30 sign extension instructions may be redundant.
31 There are two cases in which the extension may be redundant:
34 The instruction that uses the 64bit operands that are sign
35 extended has a dual mode that works with 32bit operands.
45 cmpd a, b --> cmpw a, b //half word compare
48 The instruction that defines the 64bit operand (which is later sign
49 extended) has a dual mode that defines and sign-extends simultaneously
50 a 32bit operand. For example:
54 ld a --> lwa a // load half word and sign extend
60 General idea for solution:
61 --------------------------
62 First, try to merge the sign extension with the instruction that
63 defines the source of the extension and (separately) with the
64 instructions that uses the extended result. By doing this, both cases
65 of redundancies (as described above) will be eliminated.
67 Then, use partial redundancy elimination to place the non redundant
68 ones at optimal placements.
71 Implementation by example:
72 --------------------------
73 Note: The instruction stream is not changed till the last phase.
75 Phase 0: Initial code, as currently generated by gcc.
88 set ((reg:SI 10) (..def1rhs..))
89 set ((reg:DI 100) (sign_extend:DI (reg:SI 10)))
92 set ((reg:DI 100) (const_int 7))
95 set ((reg:SI 20) (..def3rhs..))
96 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
99 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
102 set ((...) (reg:DI 100))
104 Phase 1: Propagate extensions to uses.
119 From here, all of the subregs are lowpart !
121 def1, def2, def3: No change.
124 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
125 set ((reg:CC...) (compare:CC (reg:DI 100) (...)))
128 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
129 set ((...) (reg:DI 100))
132 Phase 2: Merge and eliminate locally redundant extensions.
148 The instructions that were changed at this phase are marked with
152 Remove the sign extension instruction, modify def1 and
153 insert a move instruction to assure to correctness of the code.
154 set ((subreg:SI (reg:DI 100)) (..def1rhs..))
155 set ((reg:SI 10) (subreg:SI (reg:DI 100)))
157 def2 + se: There is no need for merge.
158 Def2 is not changed but a sign extension instruction is
160 set ((reg:DI 100) (const_int 7))
161 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
163 *def3 + se3: Merge succeeded.
164 set ((reg:DI 100) (sign_extend:DI (..def3rhs..)))
165 set ((reg:SI 20) (reg:DI 100))
166 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
167 (The extension instruction is the original one).
169 *use1: Merge succeeded. Remove the sign extension instruction.
171 (compare:CC (subreg:SI (reg:DI 100)) (...)))
173 use2, use3: Merge failed. No change.
175 use4: The extension is locally redundant, therefore it is eliminated
179 Phase 3: Eliminate globally redundant extensions.
181 Following the LCM output:
196 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
199 set ((reg:DI 100) (sign_extend:DI (reg:SI 20)))
202 Phase 4: Commit changes to the insn stream.
205 def1 def3 *def1 def2 *def3
206 se1 def2 se3 [se removed] [se removed]
208 | \ | / | ------> | \ | / |
209 | \ | / | ------> | se | / |
213 use1 use2 use3 *use1 use2 use3
216 The instructions that were changed during the whole optimization are
217 marked with asterisk.
222 [ set ((reg:SI 10) (..def1rhs..)) ] - Deleted
223 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted
224 set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted
225 set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted
228 set ((reg:DI 100) (const_int 7)) - No change
231 [ set ((reg:SI 20) (..def3rhs..)) ] - Deleted
232 [ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted
233 set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted
234 set ((reg:SI 20) (reg:DI 100)) - Inserted
237 [ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted
238 set ((reg:CC...) - Inserted
239 (compare:CC (subreg:SI (reg:DI 100)) (...)))
242 set ((...) (reg:DI 100)) - No change
245 set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100)))))
247 Note: Most of the simple move instructions that were inserted will be
248 trivially dead and therefore eliminated.
250 The implementation outline:
251 ---------------------------
253 A web is RELEVANT if at the end of phase 1, his leader's
254 relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of
255 the web is the source_mode of his leader.
256 A definition is a candidate for the optimization if it is part
257 of a RELEVANT web and his local source_mode is not narrower
258 then the source_mode of its web.
259 A use is a candidate for the optimization if it is part of a
261 A simple explicit extension is a single set instruction that
262 extends a register (or a subregister) to a register (or
264 A complex explicit extension is an explicit extension instruction
266 A def extension is a simple explicit extension that is
267 also a candidate for the optimization. This extension is part
268 of the instruction stream, it is not generated by this
270 A use extension is a simple explicit extension that is generated
271 and stored for candidate use during this optimization. It is
272 not emitted to the instruction stream till the last phase of
274 A reference is an instruction that satisfy at least on of these
276 - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web.
277 - It is followed by a def extension.
278 - It contains a candidate use.
280 Phase 1: Propagate extensions to uses.
281 In this phase, we find candidate extensions for the optimization
282 and we generate (but not emit) proper extensions "right before the
285 a. Build a DF object.
286 b. Traverse over all the instructions that contains a definition
287 and set their local relevancy and local source_mode like this:
288 - If the instruction is a simple explicit extension instruction,
289 mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension
290 type and mark its source_mode to be the mode of the quantity
291 that is been extended.
292 - Otherwise, If the instruction has an implicit extension,
293 which means that its high part is an extension of its low part,
294 or if it is a complicated explicit extension, mark it as
295 EXTENDED_DEF and set its source_mode to be the narrowest
296 mode that is been extended in the instruction.
297 c. Traverse over all the instructions that contains a use and set
298 their local relevancy to RELEVANT_USE (except for few corner
300 d. Produce the web. During union of two entries, update the
301 relevancy and source_mode of the leader. There are two major
302 guide lines for this update:
303 - If one of the entries is NOT_RELEVANT, mark the leader
305 - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF
306 (or vice versa) mark the leader as NOT_RELEVANT. We don't
307 handle this kind of mixed webs.
308 (For more details about this update process,
309 see see_update_leader_extra_info ()).
310 e. Generate uses extensions according to the relevancy and
311 source_mode of the webs.
313 Phase 2: Merge and eliminate locally redundant extensions.
314 In this phase, we try to merge def extensions and use
315 extensions with their references, and eliminate redundant extensions
316 in the same basic block.
318 Traverse over all the references. Do this in basic block number and
319 luid number forward order.
320 For each reference do:
321 a. Peephole optimization - try to merge it with all its
322 def extensions and use extensions in the following
324 - Try to merge only the def extensions, one by one.
325 - Try to merge only the use extensions, one by one.
326 - Try to merge any couple of use extensions simultaneously.
327 - Try to merge any def extension with one or two uses
328 extensions simultaneously.
329 b. Handle each EXTENDED_DEF in it as if it was already merged with
332 During the merge process we save the following data for each
333 register in each basic block:
334 a. The first instruction that defines the register in the basic
336 b. The last instruction that defines the register in the basic
338 c. The first extension of this register before the first
339 instruction that defines it in the basic block.
340 c. The first extension of this register after the last
341 instruction that defines it in the basic block.
342 This data will help us eliminate (or more precisely, not generate)
343 locally redundant extensions, and will be useful in the next stage.
345 While merging extensions with their reference there are 4 possible
347 a. A use extension was merged with the reference:
348 Delete the extension instruction and save the merged reference
349 for phase 4. (For details, see see_use_extension_merged ())
350 b. A use extension failed to be merged with the reference:
351 If there is already such an extension in the same basic block
352 and it is not dead at this point, delete the unmerged extension
353 (it is locally redundant), otherwise properly update the above
355 (For details, see see_merge_one_use_extension ())
356 c. A def extension was merged with the reference:
357 Mark this extension as a merged_def extension and properly
358 update the above basic block data.
359 (For details, see see_merge_one_def_extension ())
360 d. A def extension failed to be merged with the reference:
361 Replace the definition of the NARROWmode register in the
362 reference with the proper subreg of WIDEmode register and save
363 the result as a merged reference. Also, properly update the
364 the above basic block data.
365 (For details, see see_def_extension_not_merged ())
367 Phase 3: Eliminate globally redundant extensions.
368 In this phase, we set the bit vectors input of the edge based LCM
369 using the recorded data on the registers in each basic block.
370 We also save pointers for all the anticipatable and available
371 occurrences of the relevant extensions. Then we run the LCM.
373 a. Initialize the comp, antloc, kill bit vectors to zero and the
374 transp bit vector to ones.
376 b. Traverse over all the references. Do this in basic block number
377 and luid number forward order. For each reference:
378 - Go over all its use extensions. For each such extension -
379 If it is not dead from the beginning of the basic block SET
380 the antloc bit of the current extension in the current
382 If it is not dead till the end of the basic block SET the
383 comp bit of the current extension in the current basic
385 - Go over all its def extensions that were merged with
386 it. For each such extension -
387 If it is not dead till the end of the basic block SET the
388 comp bit of the current extension in the current basic
390 RESET the proper transp and kill bits.
391 - Go over all its def extensions that were not merged
392 with it. For each such extension -
393 RESET the transp bit and SET the kill bit of the current
394 extension in the current basic block bits.
396 c. Run the edge based LCM.
398 Phase 4: Commit changes to the insn stream.
399 This is the only phase that actually changes the instruction stream.
400 Up to this point the optimization could be aborted at any time.
401 Here we insert extensions at their best placements and delete the
402 redundant ones according to the output of the LCM. We also replace
403 some of the instructions according to the second phase merges results.
405 a. Use the pre_delete_map (from the output of the LCM) in order to
406 delete redundant extensions. This will prevent them from been
407 emitted in the first place.
409 b. Insert extensions on edges where needed according to
410 pre_insert_map and edge_list (from the output of the LCM).
412 c. For each reference do-
413 - Emit all the uses extensions that were not deleted until now,
414 right before the reference.
415 - Delete all the merged and unmerged def extensions from
416 the instruction stream.
417 - Replace the reference with the merged one, if exist.
419 The implementation consists of four data structures:
421 Purpose: To handle the relevancy of the uses, definitions and webs.
422 Relevant structures: web_entry (from df.h), see_entry_extra_info.
423 Details: This is a disjoint-set data structure. Most of its functions are
424 implemented in web.c. Each definition and use in the code are
425 elements. A web_entry structure is allocated for each element to
426 hold the element's relevancy and source_mode. The union rules are
427 defined in see_update_leader_extra_info ().
429 Purpose: To store references and their extensions (uses and defs)
430 and to enable traverse over these references according to basic
432 Relevant structure: see_ref_s.
433 Details: This data structure consists of an array of splay trees. One splay
434 tree for each basic block. The splay tree nodes are references and
435 the keys are the luids of the references.
436 A see_ref_s structure is allocated for each reference. It holds the
437 reference itself, its def and uses extensions and later the merged
438 version of the reference.
439 Using this data structure we can traverse over all the references of
440 a basic block and their extensions in forward order.
441 - Data structure III.
442 Purpose: To store local properties of registers for each basic block.
443 This data will later help us build the LCM sbitmap_vectors
445 Relevant structure: see_register_properties.
446 Details: This data structure consists of an array of hash tables. One hash
447 for each basic block. The hash node are a register properties
448 and the keys are the numbers of the registers.
449 A see_register_properties structure is allocated for each register
450 that we might be interested in its properties.
451 Using this data structure we can easily find the properties of a
452 register in a specific basic block. This is necessary for locally
453 redundancy elimination and for setting up the LCM input.
455 Purpose: To store the extensions that are candidate for PRE and their
456 anticipatable and available occurrences.
457 Relevant structure: see_occr, see_pre_extension_expr.
458 Details: This data structure is a hash tables. Its nodes are the extensions
459 that are candidate for PRE.
460 A see_pre_extension_expr structure is allocated for each candidate
461 extension. It holds a copy of the extension and a linked list of all
462 the anticipatable and available occurrences of it.
463 We use this data structure when we read the output of the LCM. */
467 #include "coretypes.h"
474 #include "insn-config.h"
477 #include "splay-tree.h"
481 #include "tree-pass.h"
483 /* Used to classify defs and uses according to relevancy. */
492 /* Used to classify extensions in relevant webs. */
493 enum extension_type {
495 EXPLICIT_DEF_EXTENSION,
496 IMPLICIT_DEF_EXTENSION,
500 /* Global data structures and flags. */
502 /* This structure will be assigned for each web_entry structure (defined
503 in df.h). It is placed in the extra_info field of a web_entry and holds the
504 relevancy and source mode of the web_entry. */
506 struct see_entry_extra_info
508 /* The relevancy of the ref. */
509 enum entry_type relevancy;
510 /* The relevancy of the ref.
511 This field is updated only once - when this structure is created. */
512 enum entry_type local_relevancy;
513 /* The source register mode. */
514 enum machine_mode source_mode;
515 /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF.
516 It is updated only once when this structure is created. */
517 enum machine_mode local_source_mode;
518 /* This field is used only if the relevancy is EXTENDED_DEF.
519 It holds the narrowest mode that is sign extended. */
520 enum machine_mode source_mode_signed;
521 /* This field is used only if the relevancy is EXTENDED_DEF.
522 It holds the narrowest mode that is zero extended. */
523 enum machine_mode source_mode_unsigned;
526 /* There is one such structure for every reference. It stores the reference
527 itself as well as its extensions (uses and definitions).
528 Used as the value in splay_tree see_bb_splay_ar[]. */
531 /* The luid of the insn. */
533 /* The insn of the ref. */
535 /* The merged insn that was formed from the reference's insn and extensions.
536 If all merges faile it remains NULL. */
538 /* The def extensions of the reference that were not merged with
540 htab_t unmerged_def_se_hash;
541 /* The def extensions of the reference that were merged with
542 it. Implicit extensions of the reference will be stored here too. */
543 htab_t merged_def_se_hash;
544 /* The uses extensions of reference. */
548 /* There is one such structure for every relevant extended register in a
549 specific basic block. This data will help us build the LCM sbitmap_vectors
551 struct see_register_properties
553 /* The register number. */
555 /* The last luid of the reference that defines this register in this basic
558 /* The luid of the reference that has the first extension of this register
559 that appears before any definition in this basic block. */
560 int first_se_before_any_def;
561 /* The luid of the reference that has the first extension of this register
562 that appears after the last definition in this basic block. */
563 int first_se_after_last_def;
566 /* Occurrence of an expression.
567 There must be at most one available occurrence and at most one anticipatable
568 occurrence per basic block. */
571 /* Next occurrence of this expression. */
572 struct see_occr *next;
573 /* The insn that computes the expression. */
578 /* There is one such structure for every relevant extension expression.
579 It holds a copy of this extension instruction as well as a linked lists of
580 pointers to all the antic and avail occurrences of it. */
581 struct see_pre_extension_expr
583 /* A copy of the extension instruction. */
585 /* Index in the available expression bitmaps. */
587 /* List of anticipatable occurrences in basic blocks in the function.
588 An "anticipatable occurrence" is the first occurrence in the basic block,
589 the operands are not modified in the basic block prior to the occurrence
590 and the output is not used between the start of the block and the
592 struct see_occr *antic_occr;
593 /* List of available occurrence in basic blocks in the function.
594 An "available occurrence" is the last occurrence in the basic block and
595 the operands are not modified by following statements in the basic block
596 [including this insn]. */
597 struct see_occr *avail_occr;
600 /* Helper structure for the note_uses and see_replace_src functions. */
601 struct see_replace_data
607 /* Helper structure for the note_uses and see_mentioned_reg functions. */
608 struct see_mentioned_reg_data
614 /* A data flow object that will be created once and used throughout the
616 static struct df *df = NULL;
617 /* An array of web_entries. The i'th definition in the df object is associated
619 static struct web_entry *def_entry = NULL;
620 /* An array of web_entries. The i'th use in the df object is associated with
622 static struct web_entry *use_entry = NULL;
623 /* Array of splay_trees.
624 see_bb_splay_ar[i] refers to the splay tree of the i'th basic block.
625 The splay tree will hold see_ref_s structures. The key is the luid
626 of the insn. This way we can traverse over the references of each basic
627 block in forward or backward order. */
628 static splay_tree *see_bb_splay_ar = NULL;
630 see_bb_hash_ar[i] refers to the hash of the i'th basic block.
631 The hash will hold see_register_properties structure. The key is regno. */
632 static htab_t *see_bb_hash_ar = NULL;
633 /* Hash table that holds a copy of all the extensions. The key is the right
634 hand side of the se_insn field. */
635 static htab_t see_pre_extension_hash = NULL;
637 /* Local LCM properties of expressions. */
638 /* Nonzero for expressions that are transparent in the block. */
639 static sbitmap *transp = NULL;
640 /* Nonzero for expressions that are computed (available) in the block. */
641 static sbitmap *comp = NULL;
642 /* Nonzero for expressions that are locally anticipatable in the block. */
643 static sbitmap *antloc = NULL;
644 /* Nonzero for expressions that are locally killed in the block. */
645 static sbitmap *ae_kill = NULL;
646 /* Nonzero for expressions which should be inserted on a specific edge. */
647 static sbitmap *pre_insert_map = NULL;
648 /* Nonzero for expressions which should be deleted in a specific block. */
649 static sbitmap *pre_delete_map = NULL;
650 /* Contains the edge_list returned by pre_edge_lcm. */
651 static struct edge_list *edge_list = NULL;
652 /* Records the last basic block at the beginning of the optimization. */
654 /* Records the number of uses at the beginning of the optimization. */
655 static unsigned int uses_num;
656 /* Records the number of definitions at the beginning of the optimization. */
657 static unsigned int defs_num;
659 #define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *) (ENTRY)->extra_info)
661 /* Functions implementation. */
663 /* Verifies that EXTENSION's pattern is this:
665 set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2))
667 If it doesn't have the expected pattern return NULL.
668 Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */
671 see_get_extension_reg (rtx extension, bool return_dest_reg)
677 set = single_set (extension);
680 lhs = SET_DEST (set);
685 else if (REG_P (SUBREG_REG (lhs)))
686 reg1 = SUBREG_REG (lhs);
690 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
696 else if (REG_P (SUBREG_REG (rhs)))
697 reg2 = SUBREG_REG (rhs);
706 /* Verifies that EXTENSION's pattern is this:
708 set (reg/subreg reg1) (sign/zero_extend: (...expr...)
710 If it doesn't have the expected pattern return UNKNOWN.
711 Otherwise, set SOURCE_MODE to be the mode of the extended expr and return
712 the rtx code of the extension. */
715 see_get_extension_data (rtx extension, enum machine_mode *source_mode)
719 if (!extension || !INSN_P (extension))
722 set = single_set (extension);
726 lhs = SET_DEST (set);
728 /* Don't handle extensions to something other then register or
730 if (!REG_P (lhs) && !SUBREG_REG (lhs))
733 if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND)
736 if (!REG_P (XEXP (rhs, 0))
737 && !(GET_CODE (XEXP (rhs, 0)) == SUBREG
738 && REG_P (SUBREG_REG (XEXP (rhs, 0)))))
741 *source_mode = GET_MODE (XEXP (rhs, 0));
743 if (GET_CODE (rhs) == SIGN_EXTEND)
749 /* Generate instruction with the pattern:
750 set ((reg r) (sign/zero_extend (subreg:mode (reg r))))
751 (the register r on both sides of the set is the same register).
753 If the recognition failed, this is very bad, return NULL (This will abort
754 the entier optimization).
755 Otherwise, return the generated instruction. */
758 see_gen_normalized_extension (rtx reg, enum rtx_code extension_code,
759 enum machine_mode mode)
762 rtx extension = NULL;
766 || (extension_code != SIGN_EXTEND && extension_code != ZERO_EXTEND))
769 subreg = gen_lowpart_SUBREG (mode, reg);
770 if (extension_code == SIGN_EXTEND)
771 extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg);
773 extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg);
776 emit_insn (gen_rtx_SET (VOIDmode, reg, extension));
780 if (insn_invalid_p (insn))
781 /* Recognition failed, this is very bad for this optimization.
782 Abort the optimization. */
787 /* Hashes and splay_trees related functions implementation. */
789 /* Helper functions for the pre_extension hash.
790 This kind of hash will hold see_pre_extension_expr structures.
792 The key is the right hand side of the se_insn field.
793 Note that the se_insn is an expression that looks like:
795 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
796 (subreg:NARROWmode (reg:WIDEmode r2)))) */
798 /* Return TRUE if P1 has the same value in its rhs as P2.
799 Otherwise, return FALSE.
800 P1 and P2 are see_pre_extension_expr structures. */
803 eq_descriptor_pre_extension (const void *p1, const void *p2)
805 const struct see_pre_extension_expr *extension1 = p1;
806 const struct see_pre_extension_expr *extension2 = p2;
807 rtx set1 = single_set (extension1->se_insn);
808 rtx set2 = single_set (extension2->se_insn);
811 gcc_assert (set1 && set2);
812 rhs1 = SET_SRC (set1);
813 rhs2 = SET_SRC (set2);
815 return rtx_equal_p (rhs1, rhs2);
819 /* P is a see_pre_extension_expr struct, use the RHS of the se_insn field.
820 Note that the RHS is an expression that looks like this:
821 (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */
824 hash_descriptor_pre_extension (const void *p)
826 const struct see_pre_extension_expr *extension = p;
827 rtx set = single_set (extension->se_insn);
833 return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0);
837 /* Free the allocated memory of the current see_pre_extension_expr struct.
839 It frees the two linked list of the occurrences structures. */
842 hash_del_pre_extension (void *p)
844 struct see_pre_extension_expr *extension = p;
845 struct see_occr *curr_occr = extension->antic_occr;
846 struct see_occr *next_occr = NULL;
848 /* Free the linked list of the anticipatable occurrences. */
851 next_occr = curr_occr->next;
853 curr_occr = next_occr;
856 /* Free the linked list of the available occurrences. */
857 curr_occr = extension->avail_occr;
860 next_occr = curr_occr->next;
862 curr_occr = next_occr;
865 /* Free the see_pre_extension_expr structure itself. */
870 /* Helper functions for the register_properties hash.
871 This kind of hash will hold see_register_properties structures.
873 The value of the key is the regno field of the structure. */
875 /* Return TRUE if P1 has the same value in the regno field as P2.
876 Otherwise, return FALSE.
877 Where P1 and P2 are see_register_properties structures. */
880 eq_descriptor_properties (const void *p1, const void *p2)
882 const struct see_register_properties *curr_prop1 = p1;
883 const struct see_register_properties *curr_prop2 = p2;
885 return curr_prop1->regno == curr_prop2->regno;
889 /* P is a see_register_properties struct, use the register number in the
893 hash_descriptor_properties (const void *p)
895 const struct see_register_properties *curr_prop = p;
896 return curr_prop->regno;
900 /* Free the allocated memory of the current see_register_properties struct. */
902 hash_del_properties (void *p)
904 struct see_register_properties *curr_prop = p;
909 /* Helper functions for an extension hash.
910 This kind of hash will hold insns that look like:
912 set ((reg:WIDEmode r1) (sign_extend:WIDEmode
913 (subreg:NARROWmode (reg:WIDEmode r2))))
915 set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2)))
917 The value of the key is (REGNO (reg:WIDEmode r1))
918 It is possible to search this hash in two ways:
919 1. By a register rtx. The Value that is been compared to the keys is the
921 2. By an insn with the above pattern. The Value that is been compared to
922 the keys is the REGNO of the reg on the lhs. */
924 /* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE.
925 Where P1 is an insn and P2 is an insn or a register. */
928 eq_descriptor_extension (const void *p1, const void *p2)
930 const rtx insn = (rtx) p1;
931 const rtx element = (rtx) p2;
932 rtx set1 = single_set (insn);
935 rtx dest_reg2 = NULL;
937 gcc_assert (set1 && element && (REG_P (element) || INSN_P (element)));
939 dest_reg1 = SET_DEST (set1);
941 if (INSN_P (element))
943 set2 = single_set (element);
944 dest_reg2 = SET_DEST (set2);
949 return REGNO (dest_reg1) == REGNO (dest_reg2);
953 /* If P is an insn, use the register number of its lhs
954 otherwise, P is a register, use its number. */
957 hash_descriptor_extension (const void *p)
959 const rtx r = (rtx) p;
965 gcc_assert (r && INSN_P (r));
966 set = single_set (r);
968 lhs = SET_DEST (set);
973 /* Helper function for a see_bb_splay_ar[i] splay tree.
974 It frees all the allocated memory of a struct see_ref_s pointer.
976 VALUE is the value of a splay tree node. */
979 see_free_ref_s (splay_tree_value value)
981 struct see_ref_s *ref_s = (struct see_ref_s *)value;
983 if (ref_s->unmerged_def_se_hash)
984 htab_delete (ref_s->unmerged_def_se_hash);
985 if (ref_s->merged_def_se_hash)
986 htab_delete (ref_s->merged_def_se_hash);
987 if (ref_s->use_se_hash)
988 htab_delete (ref_s->use_se_hash);
993 /* Rest of the implementation. */
995 /* Search the extension hash for a suitable entry for EXTENSION.
996 TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION).
998 If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the
1001 If a suitable entry was found, return the slot. Otherwise, store EXTENSION
1002 in the hash and return NULL. */
1004 static struct see_pre_extension_expr *
1005 see_seek_pre_extension_expr (rtx extension, enum extension_type type)
1007 struct see_pre_extension_expr **slot_pre_exp, temp_pre_exp;
1008 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1009 enum rtx_code extension_code;
1010 enum machine_mode source_extension_mode;
1012 if (type == DEF_EXTENSION)
1014 extension_code = see_get_extension_data (extension,
1015 &source_extension_mode);
1016 gcc_assert (extension_code != UNKNOWN);
1018 see_gen_normalized_extension (dest_extension_reg, extension_code,
1019 source_extension_mode);
1021 temp_pre_exp.se_insn = extension;
1023 (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash,
1024 &temp_pre_exp, INSERT);
1025 if (*slot_pre_exp == NULL)
1026 /* This is the first time this extension instruction is encountered. Store
1029 (*slot_pre_exp) = xmalloc (sizeof (struct see_pre_extension_expr));
1030 (*slot_pre_exp)->se_insn = extension;
1031 (*slot_pre_exp)->bitmap_index =
1032 (htab_elements (see_pre_extension_hash) - 1);
1033 (*slot_pre_exp)->antic_occr = NULL;
1034 (*slot_pre_exp)->avail_occr = NULL;
1037 return *slot_pre_exp;
1041 /* This function defines how to update the extra_info of the web_entry.
1043 FIRST is the pointer of the extra_info of the first web_entry.
1044 SECOND is the pointer of the extra_info of the second web_entry.
1045 The first web_entry will be the predecessor (leader) of the second web_entry
1048 Return true if FIRST and SECOND points to the same web entry structure and
1049 nothing is done. Otherwise, return false. */
1052 see_update_leader_extra_info (struct web_entry *first, struct web_entry *second)
1054 struct see_entry_extra_info *first_ei, *second_ei;
1056 first = unionfind_root (first);
1057 second = unionfind_root (second);
1059 if (unionfind_union (first, second))
1062 first_ei = (struct see_entry_extra_info *) first->extra_info;
1063 second_ei = (struct see_entry_extra_info *) second->extra_info;
1065 gcc_assert (first_ei && second_ei);
1067 if (second_ei->relevancy == NOT_RELEVANT)
1069 first_ei->relevancy = NOT_RELEVANT;
1072 switch (first_ei->relevancy)
1077 switch (second_ei->relevancy)
1082 first_ei->relevancy = second_ei->relevancy;
1083 first_ei->source_mode_signed = second_ei->source_mode_signed;
1084 first_ei->source_mode_unsigned = second_ei->source_mode_unsigned;
1086 case SIGN_EXTENDED_DEF:
1087 case ZERO_EXTENDED_DEF:
1088 first_ei->relevancy = second_ei->relevancy;
1089 first_ei->source_mode = second_ei->source_mode;
1095 case SIGN_EXTENDED_DEF:
1096 switch (second_ei->relevancy)
1098 case SIGN_EXTENDED_DEF:
1099 /* The mode of the root should be the wider one in this case. */
1100 first_ei->source_mode =
1101 (first_ei->source_mode > second_ei->source_mode) ?
1102 first_ei->source_mode : second_ei->source_mode;
1106 case ZERO_EXTENDED_DEF:
1107 /* Don't mix webs with zero extend and sign extend. */
1108 first_ei->relevancy = NOT_RELEVANT;
1111 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1112 first_ei->relevancy = NOT_RELEVANT;
1114 /* The mode of the root should be the wider one in this case. */
1115 first_ei->source_mode =
1116 (first_ei->source_mode > second_ei->source_mode_signed) ?
1117 first_ei->source_mode : second_ei->source_mode_signed;
1123 /* This case is similar to the previous one, with little changes. */
1124 case ZERO_EXTENDED_DEF:
1125 switch (second_ei->relevancy)
1127 case SIGN_EXTENDED_DEF:
1128 /* Don't mix webs with zero extend and sign extend. */
1129 first_ei->relevancy = NOT_RELEVANT;
1133 case ZERO_EXTENDED_DEF:
1134 /* The mode of the root should be the wider one in this case. */
1135 first_ei->source_mode =
1136 (first_ei->source_mode > second_ei->source_mode) ?
1137 first_ei->source_mode : second_ei->source_mode;
1140 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1141 first_ei->relevancy = NOT_RELEVANT;
1143 /* The mode of the root should be the wider one in this case. */
1144 first_ei->source_mode =
1145 (first_ei->source_mode > second_ei->source_mode_unsigned) ?
1146 first_ei->source_mode : second_ei->source_mode_unsigned;
1153 if (first_ei->source_mode_signed != MAX_MACHINE_MODE
1154 && first_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1156 switch (second_ei->relevancy)
1158 case SIGN_EXTENDED_DEF:
1159 first_ei->relevancy = SIGN_EXTENDED_DEF;
1160 first_ei->source_mode =
1161 (first_ei->source_mode_signed > second_ei->source_mode) ?
1162 first_ei->source_mode_signed : second_ei->source_mode;
1166 case ZERO_EXTENDED_DEF:
1167 first_ei->relevancy = ZERO_EXTENDED_DEF;
1168 first_ei->source_mode =
1169 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1170 first_ei->source_mode_unsigned : second_ei->source_mode;
1173 if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE)
1174 first_ei->source_mode_unsigned =
1175 (first_ei->source_mode_unsigned >
1176 second_ei->source_mode_unsigned) ?
1177 first_ei->source_mode_unsigned :
1178 second_ei->source_mode_unsigned;
1179 if (second_ei->source_mode_signed != MAX_MACHINE_MODE)
1180 first_ei->source_mode_signed =
1181 (first_ei->source_mode_signed >
1182 second_ei->source_mode_signed) ?
1183 first_ei->source_mode_signed : second_ei->source_mode_signed;
1189 else if (first_ei->source_mode_signed == MAX_MACHINE_MODE)
1191 gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE);
1192 switch (second_ei->relevancy)
1194 case SIGN_EXTENDED_DEF:
1195 first_ei->relevancy = NOT_RELEVANT;
1199 case ZERO_EXTENDED_DEF:
1200 first_ei->relevancy = ZERO_EXTENDED_DEF;
1201 first_ei->source_mode =
1202 (first_ei->source_mode_unsigned > second_ei->source_mode) ?
1203 first_ei->source_mode_unsigned : second_ei->source_mode;
1206 if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE)
1207 first_ei->relevancy = NOT_RELEVANT;
1209 first_ei->source_mode_unsigned =
1210 (first_ei->source_mode_unsigned >
1211 second_ei->source_mode_unsigned) ?
1212 first_ei->source_mode_unsigned :
1213 second_ei->source_mode_unsigned;
1221 gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE);
1222 gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE);
1223 switch (second_ei->relevancy)
1225 case SIGN_EXTENDED_DEF:
1226 first_ei->relevancy = SIGN_EXTENDED_DEF;
1227 first_ei->source_mode =
1228 (first_ei->source_mode_signed > second_ei->source_mode) ?
1229 first_ei->source_mode_signed : second_ei->source_mode;
1233 case ZERO_EXTENDED_DEF:
1234 first_ei->relevancy = NOT_RELEVANT;
1237 if (second_ei->source_mode_signed == MAX_MACHINE_MODE)
1238 first_ei->relevancy = NOT_RELEVANT;
1240 first_ei->source_mode_signed =
1241 (first_ei->source_mode_signed >
1242 second_ei->source_mode_signed) ?
1243 first_ei->source_mode_signed : second_ei->source_mode_signed;
1251 /* Unknown patern type. */
1259 /* Free global data structures. */
1262 see_free_data_structures (void)
1267 /* Free the bitmap vectors. */
1270 sbitmap_vector_free (transp);
1272 sbitmap_vector_free (comp);
1274 sbitmap_vector_free (antloc);
1276 sbitmap_vector_free (ae_kill);
1281 sbitmap_vector_free (pre_insert_map);
1282 pre_insert_map = NULL;
1286 sbitmap_vector_free (pre_delete_map);
1287 pre_delete_map = NULL;
1291 free_edge_list (edge_list);
1295 /* Free the extension hash. */
1296 htab_delete (see_pre_extension_hash);
1298 /* Free the array of hashes. */
1299 for (i = 0; i < last_bb; i++)
1300 if (see_bb_hash_ar[i])
1301 htab_delete (see_bb_hash_ar[i]);
1302 free (see_bb_hash_ar);
1304 /* Free the array of splay trees. */
1305 for (i = 0; i < last_bb; i++)
1306 if (see_bb_splay_ar[i])
1307 splay_tree_delete (see_bb_splay_ar[i]);
1308 free (see_bb_splay_ar);
1310 /* Free the array of web entries and their extra info field. */
1311 for (j = 0; j < defs_num; j++)
1312 free (def_entry[j].extra_info);
1314 for (j = 0; j < uses_num; j++)
1315 free (use_entry[j].extra_info);
1320 /* Initialize global data structures and variables. */
1323 see_initialize_data_structures (void)
1325 /* Build the df object. */
1326 df = df_init (DF_HARD_REGS | DF_EQUIV_NOTES | DF_SUBREGS);
1327 df_rd_add_problem (df, 0);
1328 df_chain_add_problem (df, DF_DU_CHAIN | DF_UD_CHAIN);
1332 df_dump (df, dump_file);
1334 /* Record the last basic block at the beginning of the optimization. */
1335 last_bb = last_basic_block;
1336 /* Record the number of uses at the beginning of the optimization. */
1337 uses_num = DF_USES_SIZE (df);
1338 /* Record the number of definitions at the beginning of the optimization. */
1339 defs_num = DF_DEFS_SIZE (df);
1341 /* Allocate web entries array for the union-find data structure. */
1342 def_entry = xcalloc (defs_num, sizeof (struct web_entry));
1343 use_entry = xcalloc (uses_num, sizeof (struct web_entry));
1345 /* Allocate an array of splay trees.
1346 One splay tree for each basic block. */
1347 see_bb_splay_ar = xcalloc (last_bb, sizeof (splay_tree));
1349 /* Allocate an array of hashes.
1350 One hash for each basic block. */
1351 see_bb_hash_ar = xcalloc (last_bb, sizeof (htab_t));
1353 /* Allocate the extension hash. It will hold the extensions that we want
1355 see_pre_extension_hash = htab_create (10,
1356 hash_descriptor_pre_extension,
1357 eq_descriptor_pre_extension,
1358 hash_del_pre_extension);
1362 /* Function called by note_uses to check if a register is used in a
1365 X is a pointer to the subexpression and DATA is a pointer to a
1366 see_mentioned_reg_data structure that contains the register to look for and
1367 a place for the result. */
1370 see_mentioned_reg (rtx *x, void *data)
1372 struct see_mentioned_reg_data *d
1373 = (struct see_mentioned_reg_data *) data;
1375 if (reg_mentioned_p (d->reg, *x))
1376 d->mentioned = true;
1380 /* We don't want to merge a use extension with a reference if the extended
1381 register is used only in a simple move instruction. We also don't want to
1382 merge a def extension with a reference if the source register of the
1383 extension is defined only in a simple move in the reference.
1385 REF is the reference instruction.
1386 EXTENSION is the use extension or def extension instruction.
1387 TYPE is the type of the extension (use or def).
1389 Return true if the reference is complicated enough, so we would like to merge
1390 it with the extension. Otherwise, return false. */
1393 see_want_to_be_merged_with_extension (rtx ref, rtx extension,
1394 enum extension_type type)
1397 rtx dest_extension_reg = see_get_extension_reg (extension, 1);
1398 rtx source_extension_reg = see_get_extension_reg (extension, 0);
1400 struct see_mentioned_reg_data d;
1403 pat = PATTERN (ref);
1404 code = GET_CODE (pat);
1406 if (code == PARALLEL)
1408 for (i = 0; i < XVECLEN (pat, 0); i++)
1410 rtx sub = XVECEXP (pat, 0, i);
1412 if (GET_CODE (sub) == SET
1413 && (REG_P (SET_DEST (sub))
1414 || (GET_CODE (SET_DEST (sub)) == SUBREG
1415 && REG_P (SUBREG_REG (SET_DEST (sub)))))
1416 && (REG_P (SET_SRC (sub))
1417 || (GET_CODE (SET_SRC (sub)) == SUBREG
1418 && REG_P (SUBREG_REG (SET_SRC (sub))))))
1420 /* This is a simple move SET. */
1421 if (type == DEF_EXTENSION
1422 && reg_mentioned_p (source_extension_reg, SET_DEST (sub)))
1427 /* This is not a simple move SET.
1428 Check if it uses the source of the extension. */
1429 if (type == USE_EXTENSION)
1431 d.reg = dest_extension_reg;
1432 d.mentioned = false;
1433 note_uses (&sub, see_mentioned_reg, &d);
1439 if (type == USE_EXTENSION)
1445 && (REG_P (SET_DEST (pat))
1446 || (GET_CODE (SET_DEST (pat)) == SUBREG
1447 && REG_P (SUBREG_REG (SET_DEST (pat)))))
1448 && (REG_P (SET_SRC (pat))
1449 || (GET_CODE (SET_SRC (pat)) == SUBREG
1450 && REG_P (SUBREG_REG (SET_SRC (pat))))))
1451 /* This is a simple move SET. */
1459 /* Print the register number of the current see_register_properties
1462 This is a subroutine of see_main called via htab_traverse.
1463 SLOT contains the current see_register_properties structure pointer. */
1466 see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED)
1468 struct see_register_properties *prop = *slot;
1471 fprintf (dump_file, "Property found for register %d\n", prop->regno);
1476 /* Print the extension instruction of the current see_register_properties
1479 This is a subroutine of see_main called via htab_traverse.
1480 SLOT contains the current see_pre_extension_expr structure pointer. */
1483 see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED)
1485 struct see_pre_extension_expr *pre_extension = *slot;
1487 gcc_assert (pre_extension
1488 && pre_extension->se_insn
1489 && INSN_P (pre_extension->se_insn));
1491 fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index);
1492 print_rtl_single (dump_file, pre_extension->se_insn);
1498 /* Phase 4 implementation: Commit changes to the insn stream. */
1500 /* Delete the merged def extension.
1502 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1504 SLOT contains the current def extension instruction.
1505 B is the see_ref_s structure pointer. */
1508 see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1514 fprintf (dump_file, "Deleting merged def extension:\n");
1515 print_rtl_single (dump_file, def_se);
1518 if (INSN_DELETED_P (def_se))
1519 /* This def extension is an implicit one. No need to delete it since
1520 it is not in the insn stream. */
1523 delete_insn (def_se);
1528 /* Delete the unmerged def extension.
1530 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1532 SLOT contains the current def extension instruction.
1533 B is the see_ref_s structure pointer. */
1536 see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1542 fprintf (dump_file, "Deleting unmerged def extension:\n");
1543 print_rtl_single (dump_file, def_se);
1546 delete_insn (def_se);
1551 /* Emit the non-redundant use extension to the instruction stream.
1553 This is a subroutine of see_commit_ref_changes called via htab_traverse.
1555 SLOT contains the current use extension instruction.
1556 B is the see_ref_s structure pointer. */
1559 see_emit_use_extension (void **slot, void *b)
1562 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1564 if (INSN_DELETED_P (use_se))
1565 /* This use extension was previously removed according to the lcm
1571 fprintf (dump_file, "Inserting use extension:\n");
1572 print_rtl_single (dump_file, use_se);
1575 add_insn_before (use_se, curr_ref_s->insn);
1581 /* For each relevant reference:
1582 a. Emit the non-redundant use extensions.
1583 b. Delete the def extensions.
1584 c. Replace the original reference with the merged one (if exists) and add the
1585 move instructions that were generated.
1587 This is a subroutine of see_commit_changes called via splay_tree_foreach.
1589 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
1590 see_ref_s structure. */
1593 see_commit_ref_changes (splay_tree_node stn,
1594 void *data ATTRIBUTE_UNUSED)
1596 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
1597 htab_t unmerged_def_se_hash =
1598 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
1599 htab_t merged_def_se_hash =
1600 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
1601 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
1602 rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn;
1604 /* Emit the non-redundant use extensions. */
1606 htab_traverse_noresize (use_se_hash, see_emit_use_extension,
1607 (PTR) (stn->value));
1609 /* Delete the def extensions. */
1610 if (unmerged_def_se_hash)
1611 htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension,
1612 (PTR) (stn->value));
1614 if (merged_def_se_hash)
1615 htab_traverse (merged_def_se_hash, see_delete_merged_def_extension,
1616 (PTR) (stn->value));
1618 /* Replace the original reference with the merged one (if exists) and add the
1619 move instructions that were generated. */
1620 if (merged_ref && !INSN_DELETED_P (ref))
1624 fprintf (dump_file, "Replacing orig reference:\n");
1625 print_rtl_single (dump_file, ref);
1626 fprintf (dump_file, "With merged reference:\n");
1627 print_rtl_single (dump_file, merged_ref);
1629 emit_insn_after (merged_ref, ref);
1633 /* Continue to the next reference. */
1638 /* Insert partially redundant expressions on edges to make the expressions fully
1641 INDEX_MAP is a mapping of an index to an expression.
1642 Return true if an instruction was inserted on an edge.
1643 Otherwise, return false. */
1646 see_pre_insert_extensions (struct see_pre_extension_expr **index_map)
1648 int num_edges = NUM_EDGES (edge_list);
1649 int set_size = pre_insert_map[0]->size;
1650 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1657 for (e = 0; e < num_edges; e++)
1660 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
1662 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
1664 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
1666 for (j = indx; insert && j < (int) pre_extension_num;
1670 struct see_pre_extension_expr *expr = index_map[j];
1671 int idx = expr->bitmap_index;
1673 edge eg = INDEX_EDGE (edge_list, e);
1676 emit_insn (PATTERN (expr->se_insn));
1677 se_insn = get_insns ();
1680 if (eg->flags & EDGE_ABNORMAL)
1682 rtx new_insn = NULL;
1684 new_insn = insert_insn_end_bb_new (se_insn, bb);
1685 gcc_assert (new_insn && INSN_P (new_insn));
1690 "PRE: end of bb %d, insn %d, ",
1691 bb->index, INSN_UID (new_insn));
1693 "inserting expression %d\n", idx);
1698 insert_insn_on_edge (se_insn, eg);
1702 fprintf (dump_file, "PRE: edge (%d,%d), ",
1704 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
1705 fprintf (dump_file, "inserting expression %d\n", idx);
1716 /* Since all the redundant extensions must be anticipatable, they must be a use
1717 extensions. Mark them as deleted. This will prevent them from been emitted
1720 This is a subroutine of see_commit_changes called via htab_traverse.
1722 SLOT contains the current see_pre_extension_expr structure pointer. */
1725 see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED)
1727 struct see_pre_extension_expr *expr = *slot;
1728 struct see_occr *occr;
1729 int indx = expr->bitmap_index;
1731 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1733 if (TEST_BIT (pre_delete_map[occr->block_num], indx))
1735 /* Mark as deleted. */
1736 INSN_DELETED_P (occr->insn) = 1;
1739 fprintf (dump_file,"Redundant extension deleted:\n");
1740 print_rtl_single (dump_file, occr->insn);
1748 /* Create the index_map mapping of an index to an expression.
1750 This is a subroutine of see_commit_changes called via htab_traverse.
1752 SLOT contains the current see_pre_extension_expr structure pointer.
1753 B a pointer to see_pre_extension_expr structure pointer. */
1756 see_map_extension (void **slot, void *b)
1758 struct see_pre_extension_expr *expr = *slot;
1759 struct see_pre_extension_expr **index_map =
1760 (struct see_pre_extension_expr **) b;
1762 index_map[expr->bitmap_index] = expr;
1768 /* Phase 4 top level function.
1769 In this phase we finally change the instruction stream.
1770 Here we insert extensions at their best placements and delete the
1771 redundant ones according to the output of the LCM. We also replace
1772 some of the instructions according to phase 2 merges results. */
1775 see_commit_changes (void)
1777 struct see_pre_extension_expr **index_map;
1778 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
1779 bool did_insert = false;
1782 index_map = xcalloc (pre_extension_num,
1783 sizeof (struct see_pre_extension_expr *));
1787 "* Phase 4: Commit changes to the insn stream. *\n");
1789 /* Produce a mapping of all the pre_extensions. */
1790 htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map);
1792 /* Delete redundant extension. This will prevent them from been emitted in
1794 htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL);
1796 /* At this point, we must free the DF object, since the number of basic blocks
1801 /* Insert extensions on edges, according to the LCM result. */
1802 did_insert = see_pre_insert_extensions (index_map);
1805 commit_edge_insertions ();
1807 /* Commit the rest of the changes. */
1808 for (i = 0; i < last_bb; i++)
1810 if (see_bb_splay_ar[i])
1812 /* Traverse over all the references in the basic block in forward
1814 splay_tree_foreach (see_bb_splay_ar[i],
1815 see_commit_ref_changes, NULL);
1823 /* Phase 3 implementation: Eliminate globally redundant extensions. */
1825 /* Analyze the properties of a merged def extension for the LCM and record avail
1828 This is a subroutine of see_analyze_ref_local_prop called
1831 SLOT contains the current def extension instruction.
1832 B is the see_ref_s structure pointer. */
1835 see_analyze_merged_def_local_prop (void **slot, void *b)
1838 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1839 rtx ref = curr_ref_s->insn;
1840 struct see_pre_extension_expr *extension_expr;
1842 int bb_num = BLOCK_NUM (ref);
1843 htab_t curr_bb_hash;
1844 struct see_register_properties *curr_prop, **slot_prop;
1845 struct see_register_properties temp_prop;
1846 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1847 struct see_occr *curr_occr = NULL;
1848 struct see_occr *tmp_occr = NULL;
1850 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1851 /* The extension_expr must be found. */
1852 gcc_assert (extension_expr);
1854 curr_bb_hash = see_bb_hash_ar[bb_num];
1855 gcc_assert (curr_bb_hash);
1856 temp_prop.regno = REGNO (dest_extension_reg);
1858 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1859 &temp_prop, INSERT);
1860 curr_prop = *slot_prop;
1861 gcc_assert (curr_prop);
1863 indx = extension_expr->bitmap_index;
1865 /* Reset the transparency bit. */
1866 RESET_BIT (transp[bb_num], indx);
1867 /* Reset the killed bit. */
1868 RESET_BIT (ae_kill[bb_num], indx);
1870 if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref))
1872 /* Set the available bit. */
1873 SET_BIT (comp[bb_num], indx);
1874 /* Record the available occurrence. */
1875 curr_occr = xmalloc (sizeof (struct see_occr));
1876 curr_occr->next = NULL;
1877 curr_occr->insn = def_se;
1878 curr_occr->block_num = bb_num;
1879 tmp_occr = extension_expr->avail_occr;
1881 extension_expr->avail_occr = curr_occr;
1884 while (tmp_occr->next)
1885 tmp_occr = tmp_occr->next;
1886 tmp_occr->next = curr_occr;
1894 /* Analyze the properties of a unmerged def extension for the LCM.
1896 This is a subroutine of see_analyze_ref_local_prop called
1899 SLOT contains the current def extension instruction.
1900 B is the see_ref_s structure pointer. */
1903 see_analyze_unmerged_def_local_prop (void **slot, void *b)
1906 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1907 rtx ref = curr_ref_s->insn;
1908 struct see_pre_extension_expr *extension_expr;
1910 int bb_num = BLOCK_NUM (ref);
1911 htab_t curr_bb_hash;
1912 struct see_register_properties *curr_prop, **slot_prop;
1913 struct see_register_properties temp_prop;
1914 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
1916 extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
1917 /* The extension_expr must be found. */
1918 gcc_assert (extension_expr);
1920 curr_bb_hash = see_bb_hash_ar[bb_num];
1921 gcc_assert (curr_bb_hash);
1922 temp_prop.regno = REGNO (dest_extension_reg);
1924 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1925 &temp_prop, INSERT);
1926 curr_prop = *slot_prop;
1927 gcc_assert (curr_prop);
1929 indx = extension_expr->bitmap_index;
1931 /* Reset the transparency bit. */
1932 RESET_BIT (transp[bb_num], indx);
1933 /* Set the killed bit. */
1934 SET_BIT (ae_kill[bb_num], indx);
1940 /* Analyze the properties of a use extension for the LCM and record anic and
1943 This is a subroutine of see_analyze_ref_local_prop called
1946 SLOT contains the current use extension instruction.
1947 B is the see_ref_s structure pointer. */
1950 see_analyze_use_local_prop (void **slot, void *b)
1952 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
1954 rtx ref = curr_ref_s->insn;
1955 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
1956 struct see_pre_extension_expr *extension_expr;
1957 struct see_register_properties *curr_prop, **slot_prop;
1958 struct see_register_properties temp_prop;
1959 struct see_occr *curr_occr = NULL;
1960 struct see_occr *tmp_occr = NULL;
1961 htab_t curr_bb_hash;
1963 int bb_num = BLOCK_NUM (ref);
1965 extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION);
1966 /* The extension_expr must be found. */
1967 gcc_assert (extension_expr);
1969 curr_bb_hash = see_bb_hash_ar[bb_num];
1970 gcc_assert (curr_bb_hash);
1971 temp_prop.regno = REGNO (dest_extension_reg);
1973 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
1974 &temp_prop, INSERT);
1975 curr_prop = *slot_prop;
1976 gcc_assert (curr_prop);
1978 indx = extension_expr->bitmap_index;
1980 if (curr_prop->first_se_before_any_def == DF_INSN_LUID (df, ref))
1982 /* Set the anticipatable bit. */
1983 SET_BIT (antloc[bb_num], indx);
1984 /* Record the anticipatable occurrence. */
1985 curr_occr = xmalloc (sizeof (struct see_occr));
1986 curr_occr->next = NULL;
1987 curr_occr->insn = use_se;
1988 curr_occr->block_num = bb_num;
1989 tmp_occr = extension_expr->antic_occr;
1991 extension_expr->antic_occr = curr_occr;
1994 while (tmp_occr->next)
1995 tmp_occr = tmp_occr->next;
1996 tmp_occr->next = curr_occr;
1998 if (curr_prop->last_def < 0)
2000 /* Set the available bit. */
2001 SET_BIT (comp[bb_num], indx);
2002 /* Record the available occurrence. */
2003 curr_occr = xmalloc (sizeof (struct see_occr));
2004 curr_occr->next = NULL;
2005 curr_occr->insn = use_se;
2006 curr_occr->block_num = bb_num;
2007 tmp_occr = extension_expr->avail_occr;
2009 extension_expr->avail_occr = curr_occr;
2012 while (tmp_occr->next)
2013 tmp_occr = tmp_occr->next;
2014 tmp_occr->next = curr_occr;
2017 /* Note: there is no need to reset the killed bit since it must be zero at
2020 else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (df, ref))
2022 /* Set the available bit. */
2023 SET_BIT (comp[bb_num], indx);
2024 /* Reset the killed bit. */
2025 RESET_BIT (ae_kill[bb_num], indx);
2026 /* Record the available occurrence. */
2027 curr_occr = xmalloc (sizeof (struct see_occr));
2028 curr_occr->next = NULL;
2029 curr_occr->insn = use_se;
2030 curr_occr->block_num = bb_num;
2031 tmp_occr = extension_expr->avail_occr;
2033 extension_expr->avail_occr = curr_occr;
2036 while (tmp_occr->next)
2037 tmp_occr = tmp_occr->next;
2038 tmp_occr->next = curr_occr;
2045 /* Here we traverse over all the merged and unmerged extensions of the reference
2046 and analyze their properties for the LCM.
2048 This is a subroutine of see_execute_LCM called via splay_tree_foreach.
2050 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2051 see_ref_s structure. */
2054 see_analyze_ref_local_prop (splay_tree_node stn,
2055 void *data ATTRIBUTE_UNUSED)
2057 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2058 htab_t unmerged_def_se_hash =
2059 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2060 htab_t merged_def_se_hash =
2061 ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2063 /* Analyze use extensions that were not merged with the reference. */
2065 htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop,
2066 (PTR) (stn->value));
2068 /* Analyze def extensions that were not merged with the reference. */
2069 if (unmerged_def_se_hash)
2070 htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop,
2071 (PTR) (stn->value));
2073 /* Analyze def extensions that were merged with the reference. */
2074 if (merged_def_se_hash)
2075 htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop,
2076 (PTR) (stn->value));
2078 /* Continue to the next definition. */
2083 /* Phase 3 top level function.
2084 In this phase, we set the input bit vectors of the LCM according to data
2085 gathered in phase 2.
2086 Then we run the edge based LCM. */
2089 see_execute_LCM (void)
2091 size_t pre_extension_num = htab_elements (see_pre_extension_hash);
2096 "* Phase 3: Eliminate globally redundant extensions. *\n");
2098 /* Initialize the global sbitmap vectors. */
2099 transp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2100 comp = sbitmap_vector_alloc (last_bb, pre_extension_num);
2101 antloc = sbitmap_vector_alloc (last_bb, pre_extension_num);
2102 ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num);
2103 sbitmap_vector_ones (transp, last_bb);
2104 sbitmap_vector_zero (comp, last_bb);
2105 sbitmap_vector_zero (antloc, last_bb);
2106 sbitmap_vector_zero (ae_kill, last_bb);
2108 /* Traverse over all the splay trees of the basic blocks. */
2109 for (i = 0; i < last_bb; i++)
2111 if (see_bb_splay_ar[i])
2113 /* Traverse over all the references in the basic block in forward
2115 splay_tree_foreach (see_bb_splay_ar[i],
2116 see_analyze_ref_local_prop, NULL);
2120 /* Add fake exit edges before running the lcm. */
2121 add_noreturn_fake_exit_edges ();
2124 edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc,
2125 ae_kill, &pre_insert_map, &pre_delete_map);
2127 /* Remove the fake edges. */
2128 remove_fake_exit_edges ();
2132 /* Phase 2 implementation: Merge and eliminate locally redundant extensions. */
2134 /* In this function we set the register properties for the register that is
2135 defined and extended in the reference.
2136 The properties are defined in see_register_properties structure which is
2137 allocated per basic bloack and per register.
2138 Later the extension is inserted into the see_pre_extension_hash for the next
2139 phase of the optimization.
2141 This is a subroutine of see_handle_extensions_for_one_ref called
2144 SLOT contains the current def extension instruction.
2145 B is the see_ref_s structure pointer. */
2148 see_set_prop_merged_def (void **slot, void *b)
2151 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2152 rtx insn = curr_ref_s->insn;
2153 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2154 htab_t curr_bb_hash;
2155 struct see_register_properties *curr_prop = NULL;
2156 struct see_register_properties **slot_prop;
2157 struct see_register_properties temp_prop;
2158 int ref_luid = DF_INSN_LUID (df, insn);
2160 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2163 /* The hash doesn't exist yet. Create it. */
2164 curr_bb_hash = htab_create (10,
2165 hash_descriptor_properties,
2166 eq_descriptor_properties,
2167 hash_del_properties);
2168 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2171 /* Find the right register properties in the right basic block. */
2172 temp_prop.regno = REGNO (dest_extension_reg);
2174 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2175 &temp_prop, INSERT);
2177 if (slot_prop && *slot_prop != NULL)
2179 /* Property already exists. */
2180 curr_prop = *slot_prop;
2181 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2183 curr_prop->last_def = ref_luid;
2184 curr_prop->first_se_after_last_def = ref_luid;
2188 /* Property doesn't exist yet. */
2189 curr_prop = xmalloc (sizeof (struct see_register_properties));
2190 curr_prop->regno = REGNO (dest_extension_reg);
2191 curr_prop->last_def = ref_luid;
2192 curr_prop->first_se_before_any_def = -1;
2193 curr_prop->first_se_after_last_def = ref_luid;
2194 *slot_prop = curr_prop;
2197 /* Insert the def_se into see_pre_extension_hash if it isn't already
2199 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2205 /* In this function we set the register properties for the register that is
2206 defined but not extended in the reference.
2207 The properties are defined in see_register_properties structure which is
2208 allocated per basic bloack and per register.
2209 Later the extension is inserted into the see_pre_extension_hash for the next
2210 phase of the optimization.
2212 This is a subroutine of see_handle_extensions_for_one_ref called
2215 SLOT contains the current def extension instruction.
2216 B is the see_ref_s structure pointer. */
2219 see_set_prop_unmerged_def (void **slot, void *b)
2222 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2223 rtx insn = curr_ref_s->insn;
2224 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2225 htab_t curr_bb_hash;
2226 struct see_register_properties *curr_prop = NULL;
2227 struct see_register_properties **slot_prop;
2228 struct see_register_properties temp_prop;
2229 int ref_luid = DF_INSN_LUID (df, insn);
2231 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2234 /* The hash doesn't exist yet. Create it. */
2235 curr_bb_hash = htab_create (10,
2236 hash_descriptor_properties,
2237 eq_descriptor_properties,
2238 hash_del_properties);
2239 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2242 /* Find the right register properties in the right basic block. */
2243 temp_prop.regno = REGNO (dest_extension_reg);
2245 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2246 &temp_prop, INSERT);
2248 if (slot_prop && *slot_prop != NULL)
2250 /* Property already exists. */
2251 curr_prop = *slot_prop;
2252 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2254 curr_prop->last_def = ref_luid;
2255 curr_prop->first_se_after_last_def = -1;
2259 /* Property doesn't exist yet. */
2260 curr_prop = xmalloc (sizeof (struct see_register_properties));
2261 curr_prop->regno = REGNO (dest_extension_reg);
2262 curr_prop->last_def = ref_luid;
2263 curr_prop->first_se_before_any_def = -1;
2264 curr_prop->first_se_after_last_def = -1;
2265 *slot_prop = curr_prop;
2268 /* Insert the def_se into see_pre_extension_hash if it isn't already
2270 see_seek_pre_extension_expr (def_se, DEF_EXTENSION);
2276 /* In this function we set the register properties for the register that is used
2278 The properties are defined in see_register_properties structure which is
2279 allocated per basic bloack and per register.
2280 When a redundant use extension is found it is removed from the hash of the
2282 If the extension is non redundant it is inserted into the
2283 see_pre_extension_hash for the next phase of the optimization.
2285 This is a subroutine of see_handle_extensions_for_one_ref called
2288 SLOT contains the current use extension instruction.
2289 B is the see_ref_s structure pointer. */
2292 see_set_prop_unmerged_use (void **slot, void *b)
2295 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2296 rtx insn = curr_ref_s->insn;
2297 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2298 htab_t curr_bb_hash;
2299 struct see_register_properties *curr_prop = NULL;
2300 struct see_register_properties **slot_prop;
2301 struct see_register_properties temp_prop;
2302 bool locally_redundant = false;
2303 int ref_luid = DF_INSN_LUID (df, insn);
2305 curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)];
2308 /* The hash doesn't exist yet. Create it. */
2309 curr_bb_hash = htab_create (10,
2310 hash_descriptor_properties,
2311 eq_descriptor_properties,
2312 hash_del_properties);
2313 see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash;
2316 /* Find the right register properties in the right basic block. */
2317 temp_prop.regno = REGNO (dest_extension_reg);
2319 (struct see_register_properties **) htab_find_slot (curr_bb_hash,
2320 &temp_prop, INSERT);
2322 if (slot_prop && *slot_prop != NULL)
2324 /* Property already exists. */
2325 curr_prop = *slot_prop;
2326 gcc_assert (curr_prop->regno == REGNO (dest_extension_reg));
2329 if (curr_prop->last_def < 0 && curr_prop->first_se_before_any_def < 0)
2330 curr_prop->first_se_before_any_def = ref_luid;
2331 else if (curr_prop->last_def < 0
2332 && curr_prop->first_se_before_any_def >= 0)
2334 /* In this case the extension is localy redundant. */
2335 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2336 locally_redundant = true;
2338 else if (curr_prop->last_def >= 0
2339 && curr_prop->first_se_after_last_def < 0)
2340 curr_prop->first_se_after_last_def = ref_luid;
2341 else if (curr_prop->last_def >= 0
2342 && curr_prop->first_se_after_last_def >= 0)
2344 /* In this case the extension is localy redundant. */
2345 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2346 locally_redundant = true;
2353 /* Property doesn't exist yet. Create a new one. */
2354 curr_prop = xmalloc (sizeof (struct see_register_properties));
2355 curr_prop->regno = REGNO (dest_extension_reg);
2356 curr_prop->last_def = -1;
2357 curr_prop->first_se_before_any_def = ref_luid;
2358 curr_prop->first_se_after_last_def = -1;
2359 *slot_prop = curr_prop;
2362 /* Insert the use_se into see_pre_extension_hash if it isn't already
2364 if (!locally_redundant)
2365 see_seek_pre_extension_expr (use_se, USE_EXTENSION);
2366 if (locally_redundant && dump_file)
2368 fprintf (dump_file, "Locally redundant extension:\n");
2369 print_rtl_single (dump_file, use_se);
2375 /* Print an extension instruction.
2377 This is a subroutine of see_handle_extensions_for_one_ref called
2379 SLOT contains the extension instruction. */
2382 see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED)
2386 gcc_assert (def_se && INSN_P (def_se));
2387 print_rtl_single (dump_file, def_se);
2392 /* Function called by note_uses to replace used subexpressions.
2394 X is a pointer to the subexpression and DATA is a pointer to a
2395 see_replace_data structure that contains the data for the replacement. */
2398 see_replace_src (rtx *x, void *data)
2400 struct see_replace_data *d
2401 = (struct see_replace_data *) data;
2403 *x = replace_rtx (*x, d->from, d->to);
2407 /* At this point the pattern is expected to be:
2409 ref: set (dest_reg) (rhs)
2410 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2412 The merge of these two instructions didn't succeed.
2414 We try to generate the pattern:
2415 set (subreg (dest_extension_reg)) (rhs)
2417 We do this in 4 steps:
2418 a. Replace every use of dest_reg with a new pseudo register.
2419 b. Replace every instance of dest_reg with the subreg.
2420 c. Replace every use of the new pseudo register back to dest_reg.
2421 d. Try to recognize and simplify.
2423 If the manipulation failed, leave the original ref but try to generate and
2424 recognize a simple move instruction:
2425 set (subreg (dest_extension_reg)) (dest_reg)
2426 This move instruction will be emitted right after the ref to the instruction
2427 stream and assure the correctness of the code after def_se will be removed.
2429 CURR_REF_S is the current reference.
2430 DEF_SE is the extension that couldn't be merged. */
2433 see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se)
2435 struct see_replace_data d;
2436 /* If the original insn was already merged with an extension before,
2437 take the merged one. */
2438 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2440 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2441 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2442 rtx ref_copy = copy_rtx (ref);
2443 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2444 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2445 rtx move_insn = NULL;
2447 rtx dest_reg, dest_real_reg;
2448 rtx new_pseudo_reg, subreg;
2449 enum machine_mode source_extension_mode = GET_MODE (source_extension_reg);
2450 enum machine_mode dest_mode;
2452 set = single_set (def_se);
2454 rhs = SET_SRC (set);
2455 gcc_assert (GET_CODE (rhs) == SIGN_EXTEND
2456 || GET_CODE (rhs) == ZERO_EXTEND);
2457 dest_reg = XEXP (rhs, 0);
2458 gcc_assert (REG_P (dest_reg)
2459 || (GET_CODE (dest_reg) == SUBREG
2460 && REG_P (SUBREG_REG (dest_reg))));
2461 dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg);
2462 dest_mode = GET_MODE (dest_reg);
2464 subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg);
2465 new_pseudo_reg = gen_reg_rtx (source_extension_mode);
2467 /* Step a: Replace every use of dest_real_reg with a new pseudo register. */
2468 d.from = dest_real_reg;
2469 d.to = new_pseudo_reg;
2470 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2471 /* Step b: Replace every instance of dest_reg with the subreg. */
2472 ref_copy = replace_rtx (ref_copy, dest_reg, subreg);
2474 /* Step c: Replace every use of the new pseudo register back to
2476 d.from = new_pseudo_reg;
2477 d.to = dest_real_reg;
2478 note_uses (&PATTERN (ref_copy), see_replace_src, &d);
2480 if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy))
2481 || insn_invalid_p (ref_copy))
2483 /* The manipulation failed. */
2485 /* Create a new copy. */
2486 ref_copy = copy_rtx (ref);
2488 /* Create a simple move instruction that will replace the def_se. */
2490 emit_move_insn (subreg, dest_reg);
2491 move_insn = get_insns ();
2494 /* Link the manipulated instruction to the newly created move instruction
2495 and to the former created move instructions. */
2496 PREV_INSN (ref_copy) = NULL_RTX;
2497 NEXT_INSN (ref_copy) = move_insn;
2498 PREV_INSN (move_insn) = ref_copy;
2499 NEXT_INSN (move_insn) = merged_ref_next;
2500 if (merged_ref_next != NULL_RTX)
2501 PREV_INSN (merged_ref_next) = move_insn;
2502 curr_ref_s->merged_insn = ref_copy;
2506 fprintf (dump_file, "Following def merge failure a move ");
2507 fprintf (dump_file, "insn was added after the ref.\n");
2508 fprintf (dump_file, "Original ref:\n");
2509 print_rtl_single (dump_file, ref);
2510 fprintf (dump_file, "Move insn that was added:\n");
2511 print_rtl_single (dump_file, move_insn);
2516 /* The manipulation succeeded. Store the new manipulated reference. */
2518 /* Try to simplify the new manipulated insn. */
2519 validate_simplify_insn (ref_copy);
2521 /* Create a simple move instruction to assure the correctness of the code. */
2523 emit_move_insn (dest_reg, subreg);
2524 move_insn = get_insns ();
2527 /* Link the manipulated instruction to the newly created move instruction and
2528 to the former created move instructions. */
2529 PREV_INSN (ref_copy) = NULL_RTX;
2530 NEXT_INSN (ref_copy) = move_insn;
2531 PREV_INSN (move_insn) = ref_copy;
2532 NEXT_INSN (move_insn) = merged_ref_next;
2533 if (merged_ref_next != NULL_RTX)
2534 PREV_INSN (merged_ref_next) = move_insn;
2535 curr_ref_s->merged_insn = ref_copy;
2539 fprintf (dump_file, "Following merge failure the ref was transformed!\n");
2540 fprintf (dump_file, "Original ref:\n");
2541 print_rtl_single (dump_file, ref);
2542 fprintf (dump_file, "Transformed ref:\n");
2543 print_rtl_single (dump_file, ref_copy);
2544 fprintf (dump_file, "Move insn that was added:\n");
2545 print_rtl_single (dump_file, move_insn);
2550 /* Merge the reference instruction (ref) with the current use extension.
2552 use_se extends a NARROWmode register to a WIDEmode register.
2553 ref uses the WIDEmode register.
2555 The pattern we try to merge is this:
2556 use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2557 ref: use (dest_extension_reg)
2559 where dest_extension_reg and source_extension_reg can be subregs.
2561 The merge is done by generating, simplifying and recognizing the pattern:
2562 use (sign/zero_extend (source_extension_reg))
2564 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2565 we don't try to merge it with use_se and we continue as if the merge failed.
2567 This is a subroutine of see_handle_extensions_for_one_ref called
2569 SLOT contains the current use extension instruction.
2570 B is the see_ref_s structure pointer. */
2573 see_merge_one_use_extension (void **slot, void *b)
2575 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2577 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2579 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2580 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2581 rtx ref_copy = copy_rtx (ref);
2582 rtx extension_set = single_set (use_se);
2583 rtx extension_rhs = NULL;
2584 rtx dest_extension_reg = see_get_extension_reg (use_se, 1);
2586 rtx simplified_note = NULL;
2588 gcc_assert (use_se && curr_ref_s && extension_set);
2590 extension_rhs = SET_SRC (extension_set);
2592 /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to
2593 replace the uses of the dest_extension_reg with the rhs of the extension
2594 instruction. This is necessary since there might not be an extension in
2595 the path between the definition and the note when this optimization is
2597 note = find_reg_equal_equiv_note (ref_copy);
2600 simplified_note = simplify_replace_rtx (XEXP (note, 0),
2603 if (rtx_equal_p (XEXP (note, 0), simplified_note))
2604 /* Replacement failed. Remove the note. */
2605 remove_note (ref_copy, note);
2607 XEXP (note, 0) = simplified_note;
2610 if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION))
2612 /* The use in the reference is too simple. Don't try to merge. */
2615 fprintf (dump_file, "Use merge skipped!\n");
2616 fprintf (dump_file, "Original instructions:\n");
2617 print_rtl_single (dump_file, use_se);
2618 print_rtl_single (dump_file, ref);
2620 /* Don't remove the current use_se from the use_se_hash and continue to
2621 the next extension. */
2625 validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy);
2627 if (!num_changes_pending ())
2628 /* In this case this is not a real use (the only use is/was in the notes
2629 list). Remove the use extension from the hash. This will prevent it
2630 from been emitted in the first place. */
2634 fprintf (dump_file, "Use extension not necessary before:\n");
2635 print_rtl_single (dump_file, ref);
2637 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2638 PREV_INSN (ref_copy) = NULL_RTX;
2639 NEXT_INSN (ref_copy) = merged_ref_next;
2640 if (merged_ref_next != NULL_RTX)
2641 PREV_INSN (merged_ref_next) = ref_copy;
2642 curr_ref_s->merged_insn = ref_copy;
2646 if (!apply_change_group ())
2648 /* The merge failed. */
2651 fprintf (dump_file, "Use merge failed!\n");
2652 fprintf (dump_file, "Original instructions:\n");
2653 print_rtl_single (dump_file, use_se);
2654 print_rtl_single (dump_file, ref);
2656 /* Don't remove the current use_se from the use_se_hash and continue to
2657 the next extension. */
2661 /* The merge succeeded! */
2663 /* Try to simplify the new merged insn. */
2664 validate_simplify_insn (ref_copy);
2666 PREV_INSN (ref_copy) = NULL_RTX;
2667 NEXT_INSN (ref_copy) = merged_ref_next;
2668 if (merged_ref_next != NULL_RTX)
2669 PREV_INSN (merged_ref_next) = ref_copy;
2670 curr_ref_s->merged_insn = ref_copy;
2674 fprintf (dump_file, "Use merge succeeded!\n");
2675 fprintf (dump_file, "Original instructions:\n");
2676 print_rtl_single (dump_file, use_se);
2677 print_rtl_single (dump_file, ref);
2678 fprintf (dump_file, "Merged instruction:\n");
2679 print_rtl_single (dump_file, ref_copy);
2682 /* Remove the current use_se from the use_se_hash. This will prevent it from
2683 been emitted in the first place. */
2684 htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot);
2689 /* Merge the reference instruction (ref) with the extension that follows it
2690 in the same basic block (def_se).
2691 ref sets a NARROWmode register and def_se extends it to WIDEmode register.
2693 The pattern we try to merge is this:
2694 ref: set (dest_reg) (rhs)
2695 def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg))
2697 where dest_reg and source_extension_reg can both be subregs (togather)
2698 and (REGNO (dest_reg) == REGNO (source_extension_reg))
2700 The merge is done by generating, simplifying and recognizing the pattern:
2701 set (dest_extension_reg) (sign/zero_extend (rhs))
2702 If ref is a parallel instruction we just replace the relevant set in it.
2704 If ref is too simple (according to see_want_to_be_merged_with_extension ())
2705 we don't try to merge it with def_se and we continue as if the merge failed.
2707 This is a subroutine of see_handle_extensions_for_one_ref called
2710 SLOT contains the current def extension instruction.
2711 B is the see_ref_s structure pointer. */
2714 see_merge_one_def_extension (void **slot, void *b)
2716 struct see_ref_s *curr_ref_s = (struct see_ref_s *) b;
2718 /* If the original insn was already merged with an extension before,
2719 take the merged one. */
2720 rtx ref = (curr_ref_s->merged_insn) ? curr_ref_s->merged_insn :
2722 rtx merged_ref_next = (curr_ref_s->merged_insn) ?
2723 NEXT_INSN (curr_ref_s->merged_insn): NULL_RTX;
2724 rtx ref_copy = copy_rtx (ref);
2726 rtx source_extension_reg = see_get_extension_reg (def_se, 0);
2727 rtx dest_extension_reg = see_get_extension_reg (def_se, 1);
2728 rtx move_insn, *rtx_slot, subreg;
2729 rtx temp_extension = NULL;
2730 rtx simplified_temp_extension = NULL;
2733 enum rtx_code extension_code;
2734 enum machine_mode source_extension_mode;
2735 enum machine_mode source_mode;
2736 enum machine_mode dest_extension_mode;
2737 bool merge_success = false;
2746 if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION))
2748 /* The definition in the reference is too simple. Don't try to merge. */
2751 fprintf (dump_file, "Def merge skipped!\n");
2752 fprintf (dump_file, "Original instructions:\n");
2753 print_rtl_single (dump_file, ref);
2754 print_rtl_single (dump_file, def_se);
2757 see_def_extension_not_merged (curr_ref_s, def_se);
2758 /* Continue to the next extension. */
2762 extension_code = see_get_extension_data (def_se, &source_mode);
2764 /* Try to merge and simplify the extension. */
2765 source_extension_mode = GET_MODE (source_extension_reg);
2766 dest_extension_mode = GET_MODE (dest_extension_reg);
2768 pat = &PATTERN (ref_copy);
2769 code = GET_CODE (*pat);
2771 if (code == PARALLEL)
2773 bool need_to_apply_change = false;
2775 for (i = 0; i < XVECLEN (*pat, 0); i++)
2777 rtx *sub = &XVECEXP (*pat, 0, i);
2779 if (GET_CODE (*sub) == SET
2780 && GET_MODE (SET_SRC (*sub)) != VOIDmode
2781 && GET_MODE (SET_DEST (*sub)) == source_mode
2782 && ((REG_P (SET_DEST (*sub))
2783 && REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg))
2784 || (GET_CODE (SET_DEST (*sub)) == SUBREG
2785 && REG_P (SUBREG_REG (SET_DEST (*sub)))
2786 && (REGNO (SUBREG_REG (SET_DEST (*sub))) ==
2787 REGNO (source_extension_reg)))))
2789 rtx orig_src = SET_SRC (*sub);
2791 if (extension_code == SIGN_EXTEND)
2792 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode,
2795 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode,
2797 simplified_temp_extension = simplify_rtx (temp_extension);
2799 (simplified_temp_extension) ? simplified_temp_extension :
2801 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg,
2803 validate_change (ref_copy, sub, new_set, 1);
2804 need_to_apply_change = true;
2807 if (need_to_apply_change)
2808 if (apply_change_group ())
2809 merge_success = true;
2811 else if (code == SET
2812 && GET_MODE (SET_SRC (*pat)) != VOIDmode
2813 && GET_MODE (SET_DEST (*pat)) == source_mode
2814 && ((REG_P (SET_DEST (*pat))
2815 && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg))
2816 || (GET_CODE (SET_DEST (*pat)) == SUBREG
2817 && REG_P (SUBREG_REG (SET_DEST (*pat)))
2818 && (REGNO (SUBREG_REG (SET_DEST (*pat))) ==
2819 REGNO (source_extension_reg)))))
2821 rtx orig_src = SET_SRC (*pat);
2823 if (extension_code == SIGN_EXTEND)
2824 temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src);
2826 temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src);
2827 simplified_temp_extension = simplify_rtx (temp_extension);
2828 temp_extension = (simplified_temp_extension) ? simplified_temp_extension :
2830 new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension);
2831 if (validate_change (ref_copy, pat, new_set, 0))
2832 merge_success = true;
2836 /* The merge failed. */
2839 fprintf (dump_file, "Def merge failed!\n");
2840 fprintf (dump_file, "Original instructions:\n");
2841 print_rtl_single (dump_file, ref);
2842 print_rtl_single (dump_file, def_se);
2845 see_def_extension_not_merged (curr_ref_s, def_se);
2846 /* Continue to the next extension. */
2850 /* The merge succeeded! */
2852 /* Create a simple move instruction to assure the correctness of the code. */
2853 subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg);
2855 emit_move_insn (source_extension_reg, subreg);
2856 move_insn = get_insns ();
2859 /* Link the merged instruction to the newly created move instruction and
2860 to the former created move instructions. */
2861 PREV_INSN (ref_copy) = NULL_RTX;
2862 NEXT_INSN (ref_copy) = move_insn;
2863 PREV_INSN (move_insn) = ref_copy;
2864 NEXT_INSN (move_insn) = merged_ref_next;
2865 if (merged_ref_next != NULL_RTX)
2866 PREV_INSN (merged_ref_next) = move_insn;
2867 curr_ref_s->merged_insn = ref_copy;
2871 fprintf (dump_file, "Def merge succeeded!\n");
2872 fprintf (dump_file, "Original instructions:\n");
2873 print_rtl_single (dump_file, ref);
2874 print_rtl_single (dump_file, def_se);
2875 fprintf (dump_file, "Merged instruction:\n");
2876 print_rtl_single (dump_file, ref_copy);
2877 fprintf (dump_file, "Move instruction that was added:\n");
2878 print_rtl_single (dump_file, move_insn);
2881 /* Remove the current def_se from the unmerged_def_se_hash and insert it to
2882 the merged_def_se_hash. */
2883 htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot);
2884 if (!curr_ref_s->merged_def_se_hash)
2885 curr_ref_s->merged_def_se_hash = htab_create (10,
2886 hash_descriptor_extension,
2887 eq_descriptor_extension,
2889 rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash,
2890 dest_extension_reg, INSERT);
2891 gcc_assert (*rtx_slot == NULL);
2898 /* Try to eliminate extensions in this order:
2899 a. Try to merge only the def extensions, one by one.
2900 b. Try to merge only the use extensions, one by one.
2903 Try to merge any couple of use extensions simultaneously.
2904 Try to merge any def extension with one or two uses extensions
2907 After all the merges are done, update the register properties for the basic
2908 block and eliminate locally redundant use extensions.
2910 This is a subroutine of see_merge_and_eliminate_extensions called
2911 via splay_tree_foreach.
2912 STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a
2913 see_ref_s structure. */
2916 see_handle_extensions_for_one_ref (splay_tree_node stn,
2917 void *data ATTRIBUTE_UNUSED)
2919 htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
2920 htab_t unmerged_def_se_hash =
2921 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
2922 htab_t merged_def_se_hash;
2923 rtx ref = ((struct see_ref_s *) (stn->value))->insn;
2927 fprintf (dump_file, "Handling ref:\n");
2928 print_rtl_single (dump_file, ref);
2931 /* a. Try to eliminate only def extensions, one by one. */
2932 if (unmerged_def_se_hash)
2933 htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension,
2934 (PTR) (stn->value));
2937 /* b. Try to eliminate only use extensions, one by one. */
2938 htab_traverse_noresize (use_se_hash, see_merge_one_use_extension,
2939 (PTR) (stn->value));
2941 merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
2945 fprintf (dump_file, "The hashes of the current reference:\n");
2946 if (unmerged_def_se_hash)
2948 fprintf (dump_file, "unmerged_def_se_hash:\n");
2949 htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL);
2951 if (merged_def_se_hash)
2953 fprintf (dump_file, "merged_def_se_hash:\n");
2954 htab_traverse (merged_def_se_hash, see_print_one_extension, NULL);
2958 fprintf (dump_file, "use_se_hash:\n");
2959 htab_traverse (use_se_hash, see_print_one_extension, NULL);
2963 /* Now that all the merges are done, update the register properties of the
2964 basic block and eliminate locally redundant extensions.
2965 It is important that we first traverse the use extensions hash and
2966 afterwards the def extensions hashes. */
2969 htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use,
2970 (PTR) (stn->value));
2972 if (unmerged_def_se_hash)
2973 htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def,
2974 (PTR) (stn->value));
2976 if (merged_def_se_hash)
2977 htab_traverse (merged_def_se_hash, see_set_prop_merged_def,
2978 (PTR) (stn->value));
2980 /* Continue to the next definition. */
2985 /* Phase 2 top level function.
2986 In this phase, we try to merge def extensions and use extensions with their
2987 references, and eliminate redundant extensions in the same basic block.
2988 We also gather information for the next phases. */
2991 see_merge_and_eliminate_extensions (void)
2997 "* Phase 2: Merge and eliminate locally redundant extensions. *\n");
2999 /* Traverse over all the splay trees of the basic blocks. */
3000 for (i = 0; i < last_bb; i++)
3002 if (see_bb_splay_ar[i])
3005 fprintf (dump_file, "Handling references for bb %d\n", i);
3006 /* Traverse over all the references in the basic block in forward
3008 splay_tree_foreach (see_bb_splay_ar[i],
3009 see_handle_extensions_for_one_ref, NULL);
3015 /* Phase 1 implementation: Propagate extensions to uses. */
3017 /* Insert REF_INSN into the splay tree of its basic block.
3018 SE_INSN is the extension to store in the proper hash according to TYPE.
3020 Return true if everything went well.
3021 Otherwise, return false (this will cause the optimization to be aborted). */
3024 see_store_reference_and_extension (rtx ref_insn, rtx se_insn,
3025 enum extension_type type)
3029 splay_tree_node stn = NULL;
3030 htab_t se_hash = NULL;
3031 struct see_ref_s *ref_s = NULL;
3033 /* Check the arguments. */
3034 gcc_assert (ref_insn && se_insn);
3035 if (!see_bb_splay_ar)
3038 curr_bb_num = BLOCK_NUM (ref_insn);
3039 gcc_assert (curr_bb_num < last_bb && curr_bb_num >= 0);
3041 /* Insert the reference to the splay tree of its basic block. */
3042 if (!see_bb_splay_ar[curr_bb_num])
3043 /* The splay tree for this block doesn't exist yet, create it. */
3044 see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints,
3045 NULL, see_free_ref_s);
3047 /* Splay tree already exists, check if the current reference is already
3050 stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num],
3051 DF_INSN_LUID (df, ref_insn));
3055 case EXPLICIT_DEF_EXTENSION:
3057 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash;
3060 se_hash = htab_create (10,
3061 hash_descriptor_extension,
3062 eq_descriptor_extension,
3064 ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash =
3068 case IMPLICIT_DEF_EXTENSION:
3069 se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash;
3072 se_hash = htab_create (10,
3073 hash_descriptor_extension,
3074 eq_descriptor_extension,
3076 ((struct see_ref_s *) (stn->value))->merged_def_se_hash =
3081 se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash;
3084 se_hash = htab_create (10,
3085 hash_descriptor_extension,
3086 eq_descriptor_extension,
3088 ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash;
3096 /* Initialize a new see_ref_s structure and insert it to the splay
3100 ref_s = xmalloc (sizeof (struct see_ref_s));
3101 ref_s->luid = DF_INSN_LUID (df, ref_insn);
3102 ref_s->insn = ref_insn;
3103 ref_s->merged_insn = NULL;
3105 /* Initialize the hashes. */
3108 case EXPLICIT_DEF_EXTENSION:
3109 ref_s->unmerged_def_se_hash = htab_create (10,
3110 hash_descriptor_extension,
3111 eq_descriptor_extension,
3113 se_hash = ref_s->unmerged_def_se_hash;
3114 ref_s->merged_def_se_hash = NULL;
3115 ref_s->use_se_hash = NULL;
3117 case IMPLICIT_DEF_EXTENSION:
3118 ref_s->merged_def_se_hash = htab_create (10,
3119 hash_descriptor_extension,
3120 eq_descriptor_extension,
3122 se_hash = ref_s->merged_def_se_hash;
3123 ref_s->unmerged_def_se_hash = NULL;
3124 ref_s->use_se_hash = NULL;
3127 ref_s->use_se_hash = htab_create (10,
3128 hash_descriptor_extension,
3129 eq_descriptor_extension,
3131 se_hash = ref_s->use_se_hash;
3132 ref_s->unmerged_def_se_hash = NULL;
3133 ref_s->merged_def_se_hash = NULL;
3140 /* Insert the new extension instruction into the correct se_hash of the
3141 current reference. */
3142 rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT);
3143 if (*rtx_slot != NULL)
3145 gcc_assert (type == USE_EXTENSION);
3146 gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn)));
3149 *rtx_slot = se_insn;
3151 /* If this is a new reference, insert it into the splay_tree. */
3153 splay_tree_insert (see_bb_splay_ar[curr_bb_num],
3154 DF_INSN_LUID (df, ref_insn), (splay_tree_value) ref_s);
3159 /* Go over all the defs, for each relevant definition (defined below) store its
3160 instruction as a reference.
3162 A definition is relevant if its root has
3163 ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and
3164 his source_mode is not narrower then the the roots source_mode.
3166 Return the number of relevant defs or negative number if something bad had
3167 happened and the optimization should be aborted. */
3170 see_handle_relevant_defs (void)
3175 rtx ref_insn = NULL;
3176 struct web_entry *root_entry = NULL;
3178 int num_relevant_defs = 0;
3179 enum rtx_code extension_code;
3181 for (i = 0; i < defs_num; i++)
3183 insn = DF_REF_INSN (DF_DEFS_GET (df, i));
3184 reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i));
3192 root_entry = unionfind_root (&def_entry[i]);
3194 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3195 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3196 /* The current web is not relevant. Continue to the next def. */
3199 if (root_entry->reg)
3200 /* It isn't possible to have two different register for the same
3202 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3204 root_entry->reg = reg;
3206 /* The current definition is an EXTENDED_DEF or a definition that its
3207 source_mode is narrower then its web's source_mode.
3208 This means that we need to generate the implicit extension explicitly
3209 and store it in the current reference's merged_def_se_hash. */
3210 if (ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF
3211 || (ENTRY_EI (&def_entry[i])->local_source_mode <
3212 ENTRY_EI (root_entry)->source_mode))
3214 num_relevant_defs++;
3216 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3217 extension_code = SIGN_EXTEND;
3219 extension_code = ZERO_EXTEND;
3222 see_gen_normalized_extension (reg, extension_code,
3223 ENTRY_EI (root_entry)->source_mode);
3225 /* This is a dummy extension, mark it as deleted. */
3226 INSN_DELETED_P (se_insn) = 1;
3228 if (!see_store_reference_and_extension (insn, se_insn,
3229 IMPLICIT_DEF_EXTENSION))
3230 /* Something bad happened. Abort the optimization. */
3235 ref_insn = PREV_INSN (insn);
3236 gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn));
3238 num_relevant_defs++;
3240 if (!see_store_reference_and_extension (ref_insn, insn,
3241 EXPLICIT_DEF_EXTENSION))
3242 /* Something bad happened. Abort the optimization. */
3245 return num_relevant_defs;
3249 /* Go over all the uses, for each use in relevant web store its instruction as
3250 a reference and generate an extension before it.
3252 Return the number of relevant uses or negative number if something bad had
3253 happened and the optimization should be aborted. */
3256 see_handle_relevant_uses (void)
3260 struct web_entry *root_entry = NULL;
3263 int num_relevant_uses = 0;
3264 enum rtx_code extension_code;
3266 for (i = 0; i < uses_num; i++)
3268 insn = DF_REF_INSN (DF_USES_GET (df, i));
3269 reg = DF_REF_REAL_REG (DF_USES_GET (df, i));
3277 root_entry = unionfind_root (&use_entry[i]);
3279 if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF
3280 && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF)
3281 /* The current web is not relevant. Continue to the next use. */
3284 if (root_entry->reg)
3285 /* It isn't possible to have two different register for the same
3287 gcc_assert (rtx_equal_p (root_entry->reg, reg));
3289 root_entry->reg = reg;
3291 /* Generate the use extension. */
3292 if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF)
3293 extension_code = SIGN_EXTEND;
3295 extension_code = ZERO_EXTEND;
3298 see_gen_normalized_extension (reg, extension_code,
3299 ENTRY_EI (root_entry)->source_mode);
3301 /* This is very bad, abort the transformation. */
3304 num_relevant_uses++;
3306 if (!see_store_reference_and_extension (insn, se_insn,
3308 /* Something bad happened. Abort the optimization. */
3312 return num_relevant_uses;
3316 /* Updates the relevancy of all the uses.
3317 The information of the i'th use is stored in use_entry[i].
3318 Currently all the uses are relevant for the optimization except for uses that
3319 are in LIBCALL or RETVAL instructions. */
3322 see_update_uses_relevancy (void)
3326 struct see_entry_extra_info *curr_entry_extra_info;
3330 if (!df || !use_entry)
3333 for (i = 0; i < uses_num; i++)
3336 insn = DF_REF_INSN (DF_USES_GET (df, i));
3337 reg = DF_REF_REAL_REG (DF_USES_GET (df, i));
3345 if (insn && find_reg_note (insn, REG_LIBCALL, NULL_RTX))
3347 if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
3355 fprintf (dump_file, "u%i insn %i reg %i ",
3356 i, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3357 if (et == NOT_RELEVANT)
3358 fprintf (dump_file, "NOT RELEVANT \n");
3360 fprintf (dump_file, "RELEVANT USE \n");
3363 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info));
3364 curr_entry_extra_info->relevancy = et;
3365 curr_entry_extra_info->local_relevancy = et;
3366 use_entry[i].extra_info = curr_entry_extra_info;
3367 use_entry[i].reg = NULL;
3368 use_entry[i].pred = NULL;
3373 /* A definition in a candidate for this optimization only if its pattern is
3374 recognized as relevant in this function.
3375 INSN is the instruction to be recognized.
3377 - If this is the pattern of a common sign extension after definition:
3378 PREV_INSN (INSN): def (reg:NARROWmode r)
3379 INSN: set ((reg:WIDEmode r')
3380 (sign_extend:WIDEmode (reg:NARROWmode r)))
3381 return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3383 - If this is the pattern of a common zero extension after definition:
3384 PREV_INSN (INSN): def (reg:NARROWmode r)
3385 INSN: set ((reg:WIDEmode r')
3386 (zero_extend:WIDEmode (reg:NARROWmode r)))
3387 return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode.
3392 INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...)))
3393 return EXTENDED_DEF and set SOURCE_MODE to the mode of expr.
3396 INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...)))
3397 return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr.
3400 INSN: set ((reg:WIDEmode r) (CONST_INT (...)))
3401 return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that
3402 is implicitly sign(zero) extended to WIDEmode in the INSN.
3404 - FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF
3405 that is part of a PARALLEL instruction are not handled.
3406 These restriction can be relaxed. */
3408 static enum entry_type
3409 see_analyze_one_def (rtx insn, enum machine_mode *source_mode,
3410 enum machine_mode *source_mode_unsigned)
3412 enum rtx_code extension_code;
3416 rtx source_register = NULL;
3417 rtx prev_insn = NULL;
3418 rtx next_insn = NULL;
3419 enum machine_mode mode;
3420 enum machine_mode next_source_mode;
3421 HOST_WIDE_INT val = 0;
3422 HOST_WIDE_INT val2 = 0;
3425 *source_mode = MAX_MACHINE_MODE;
3426 *source_mode_unsigned = MAX_MACHINE_MODE;
3429 return NOT_RELEVANT;
3432 return NOT_RELEVANT;
3434 extension_code = see_get_extension_data (insn, source_mode);
3435 switch (extension_code)
3439 source_register = see_get_extension_reg (insn, 0);
3440 /* FIXME: This restriction can be relaxed. The only thing that is
3441 important is that the reference would be inside the same basic block
3442 as the extension. */
3443 prev_insn = PREV_INSN (insn);
3444 if (!prev_insn || !INSN_P (prev_insn))
3445 return NOT_RELEVANT;
3447 if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn))
3448 return NOT_RELEVANT;
3450 if (find_reg_note (prev_insn, REG_LIBCALL, NULL_RTX))
3451 return NOT_RELEVANT;
3453 if (find_reg_note (prev_insn, REG_RETVAL, NULL_RTX))
3454 return NOT_RELEVANT;
3456 /* If we can't use copy_rtx on the reference it can't be a reference. */
3457 if (GET_CODE (PATTERN (prev_insn)) == PARALLEL
3458 && asm_noperands (PATTERN (prev_insn)) >= 0)
3459 return NOT_RELEVANT;
3461 /* Now, check if this extension is a reference itself. If so, it is not
3462 relevant. Handling this extension as relevant would make things much
3463 more complicated. */
3464 next_insn = NEXT_INSN (insn);
3466 && INSN_P (prev_insn)
3467 && (see_get_extension_data (next_insn, &next_source_mode) !=
3470 rtx curr_dest_register = see_get_extension_reg (insn, 1);
3471 rtx next_source_register = see_get_extension_reg (next_insn, 0);
3473 if (REGNO (curr_dest_register) == REGNO (next_source_register))
3474 return NOT_RELEVANT;
3477 if (extension_code == SIGN_EXTEND)
3478 return SIGN_EXTENDED_DEF;
3480 return ZERO_EXTENDED_DEF;
3483 /* This may still be an EXTENDED_DEF. */
3485 /* FIXME: This restriction can be relaxed. It is possible to handle
3486 PARALLEL insns too. */
3487 set = single_set (insn);
3489 return NOT_RELEVANT;
3490 rhs = SET_SRC (set);
3491 lhs = SET_DEST (set);
3493 /* Don't handle extensions to something other then register or
3495 if (!REG_P (lhs) && !SUBREG_REG (lhs))
3496 return NOT_RELEVANT;
3498 switch (GET_CODE (rhs))
3501 *source_mode = GET_MODE (XEXP (rhs, 0));
3502 *source_mode_unsigned = MAX_MACHINE_MODE;
3503 return EXTENDED_DEF;
3505 *source_mode = MAX_MACHINE_MODE;
3506 *source_mode_unsigned = GET_MODE (XEXP (rhs, 0));
3507 return EXTENDED_DEF;
3512 /* Find the narrowest mode, val could fit into. */
3513 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0;
3514 GET_MODE_BITSIZE (mode) < BITS_PER_WORD;
3515 mode = GET_MODE_WIDER_MODE (mode), i++)
3517 val2 = trunc_int_for_mode (val, mode);
3518 if (val2 == val && *source_mode == MAX_MACHINE_MODE)
3519 *source_mode = mode;
3520 if (val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode))
3521 && *source_mode_unsigned == MAX_MACHINE_MODE)
3522 *source_mode_unsigned = mode;
3523 if (*source_mode != MAX_MACHINE_MODE
3524 && *source_mode_unsigned !=MAX_MACHINE_MODE)
3525 return EXTENDED_DEF;
3527 if (*source_mode != MAX_MACHINE_MODE
3528 || *source_mode_unsigned !=MAX_MACHINE_MODE)
3529 return EXTENDED_DEF;
3530 return NOT_RELEVANT;
3532 return NOT_RELEVANT;
3540 /* Updates the relevancy and source_mode of all the definitions.
3541 The information of the i'th definition is stored in def_entry[i]. */
3544 see_update_defs_relevancy (void)
3546 struct see_entry_extra_info *curr_entry_extra_info;
3551 enum machine_mode source_mode;
3552 enum machine_mode source_mode_unsigned;
3554 if (!df || !def_entry)
3557 for (i = 0; i < defs_num; i++)
3559 insn = DF_REF_INSN (DF_DEFS_GET (df, i));
3560 reg = DF_REF_REAL_REG (DF_DEFS_GET (df, i));
3562 et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned);
3564 curr_entry_extra_info = xmalloc (sizeof (struct see_entry_extra_info));
3565 curr_entry_extra_info->relevancy = et;
3566 curr_entry_extra_info->local_relevancy = et;
3567 if (et != EXTENDED_DEF)
3569 curr_entry_extra_info->source_mode = source_mode;
3570 curr_entry_extra_info->local_source_mode = source_mode;
3574 curr_entry_extra_info->source_mode_signed = source_mode;
3575 curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned;
3577 def_entry[i].extra_info = curr_entry_extra_info;
3578 def_entry[i].reg = NULL;
3579 def_entry[i].pred = NULL;
3583 if (et == NOT_RELEVANT)
3585 fprintf (dump_file, "d%i insn %i reg %i ",
3586 i, (insn ? INSN_UID (insn) : -1), REGNO (reg));
3587 fprintf (dump_file, "NOT RELEVANT \n");
3591 fprintf (dump_file, "d%i insn %i reg %i ",
3592 i ,INSN_UID (insn), REGNO (reg));
3593 fprintf (dump_file, "RELEVANT - ");
3596 case SIGN_EXTENDED_DEF :
3597 fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n",
3598 GET_MODE_NAME (source_mode));
3600 case ZERO_EXTENDED_DEF :
3601 fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n",
3602 GET_MODE_NAME (source_mode));
3605 fprintf (dump_file, "EXTENDED_DEF, ");
3606 if (source_mode != MAX_MACHINE_MODE
3607 && source_mode_unsigned != MAX_MACHINE_MODE)
3609 fprintf (dump_file, "positive const, ");
3610 fprintf (dump_file, "source_mode_signed = %s, ",
3611 GET_MODE_NAME (source_mode));
3612 fprintf (dump_file, "source_mode_unsigned = %s\n",
3613 GET_MODE_NAME (source_mode_unsigned));
3615 else if (source_mode != MAX_MACHINE_MODE)
3616 fprintf (dump_file, "source_mode_signed = %s\n",
3617 GET_MODE_NAME (source_mode));
3619 fprintf (dump_file, "source_mode_unsigned = %s\n",
3620 GET_MODE_NAME (source_mode_unsigned));
3631 /* Phase 1 top level function.
3632 In this phase the relevancy of all the definitions and uses are checked,
3633 later the webs are produces and the extensions are generated.
3634 These extensions are not emitted yet into the insns stream.
3636 returns true if at list one relevant web was found and there were no
3637 problems, otherwise return false. */
3640 see_propagate_extensions_to_uses (void)
3643 int num_relevant_uses;
3644 int num_relevant_defs;
3648 "* Phase 1: Propagate extensions to uses. *\n");
3650 /* Update the relevancy of references using the DF object. */
3651 see_update_defs_relevancy ();
3652 see_update_uses_relevancy ();
3654 /* Produce the webs and update the extra_info of the root.
3655 In general, a web is relevant if all its definitions and uses are relevant
3656 and there is at least one definition that was marked as SIGN_EXTENDED_DEF
3657 or ZERO_EXTENDED_DEF. */
3658 for (i = 0; i < uses_num; i++)
3659 union_defs (df, DF_USES_GET (df, i), def_entry, use_entry,
3660 see_update_leader_extra_info);
3662 /* Generate use extensions for references and insert these
3663 references to see_bb_splay_ar data structure. */
3664 num_relevant_uses = see_handle_relevant_uses ();
3666 if (num_relevant_uses < 0)
3669 /* Store the def extensions in their references structures and insert these
3670 references to see_bb_splay_ar data structure. */
3671 num_relevant_defs = see_handle_relevant_defs ();
3673 if (num_relevant_defs < 0)
3676 return num_relevant_uses > 0 || num_relevant_defs > 0;
3680 /* Main entry point for the sign extension elimination optimization. */
3688 /* Initialize global data structures. */
3689 see_initialize_data_structures ();
3691 /* Phase 1: Propagate extensions to uses. */
3692 cont = see_propagate_extensions_to_uses ();
3698 /* Phase 2: Merge and eliminate locally redundant extensions. */
3699 see_merge_and_eliminate_extensions ();
3701 /* Phase 3: Eliminate globally redundant extensions. */
3704 /* Phase 4: Commit changes to the insn stream. */
3705 see_commit_changes ();
3709 /* For debug purpose only. */
3710 fprintf (dump_file, "see_pre_extension_hash:\n");
3711 htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr,
3714 for (i = 0; i < last_bb; i++)
3716 if (see_bb_hash_ar[i])
3717 /* Traverse over all the references in the basic block in
3721 "Searching register properties in bb %d\n", i);
3722 htab_traverse (see_bb_hash_ar[i],
3723 see_print_register_properties, NULL);
3729 /* Free global data structures. */
3730 see_free_data_structures ();
3735 gate_handle_see (void)
3737 return optimize > 1 && flag_see;
3741 rest_of_handle_see (void)
3743 int no_new_pseudos_bcp = no_new_pseudos;
3747 no_new_pseudos = no_new_pseudos_bcp;
3749 delete_trivially_dead_insns (get_insns (), max_reg_num ());
3750 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
3751 (PROP_DEATH_NOTES));
3752 cleanup_cfg (CLEANUP_EXPENSIVE);
3753 reg_scan (get_insns (), max_reg_num ());
3758 struct tree_opt_pass pass_see =
3761 gate_handle_see, /* gate */
3762 rest_of_handle_see, /* execute */
3765 0, /* static_pass_number */
3767 0, /* properties_required */
3768 0, /* properties_provided */
3769 0, /* properties_destroyed */
3770 0, /* todo_flags_start */
3771 TODO_dump_func, /* todo_flags_finish */