1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2174 /* Feature tests against the various architecture variations. */
2175 unsigned char ix86_arch_features[X86_ARCH_LAST];
2177 /* Feature tests against the various architecture variations, used to create
2178 ix86_arch_features based on the processor mask. */
2179 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2180 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2181 ~(m_386 | m_486 | m_PENT | m_K6),
2183 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2186 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2189 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2192 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2196 static const unsigned int x86_accumulate_outgoing_args
2197 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2199 static const unsigned int x86_arch_always_fancy_math_387
2200 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2202 static const unsigned int x86_avx256_split_unaligned_load
2203 = m_COREI7 | m_GENERIC;
2205 static const unsigned int x86_avx256_split_unaligned_store
2206 = m_COREI7 | m_BDVER | m_GENERIC;
2208 /* In case the average insn count for single function invocation is
2209 lower than this constant, emit fast (but longer) prologue and
2211 #define FAST_PROLOGUE_INSN_COUNT 20
2213 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2214 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2215 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2216 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2218 /* Array of the smallest class containing reg number REGNO, indexed by
2219 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2221 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2223 /* ax, dx, cx, bx */
2224 AREG, DREG, CREG, BREG,
2225 /* si, di, bp, sp */
2226 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2228 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2229 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2232 /* flags, fpsr, fpcr, frame */
2233 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2235 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2238 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2241 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2242 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2243 /* SSE REX registers */
2244 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2248 /* The "default" register map used in 32bit mode. */
2250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2261 /* The "default" register map used in 64bit mode. */
2263 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2265 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2266 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2269 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2270 8,9,10,11,12,13,14,15, /* extended integer registers */
2271 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2274 /* Define the register numbers to be used in Dwarf debugging information.
2275 The SVR4 reference port C compiler uses the following register numbers
2276 in its Dwarf output code:
2277 0 for %eax (gcc regno = 0)
2278 1 for %ecx (gcc regno = 2)
2279 2 for %edx (gcc regno = 1)
2280 3 for %ebx (gcc regno = 3)
2281 4 for %esp (gcc regno = 7)
2282 5 for %ebp (gcc regno = 6)
2283 6 for %esi (gcc regno = 4)
2284 7 for %edi (gcc regno = 5)
2285 The following three DWARF register numbers are never generated by
2286 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2287 believes these numbers have these meanings.
2288 8 for %eip (no gcc equivalent)
2289 9 for %eflags (gcc regno = 17)
2290 10 for %trapno (no gcc equivalent)
2291 It is not at all clear how we should number the FP stack registers
2292 for the x86 architecture. If the version of SDB on x86/svr4 were
2293 a bit less brain dead with respect to floating-point then we would
2294 have a precedent to follow with respect to DWARF register numbers
2295 for x86 FP registers, but the SDB on x86/svr4 is so completely
2296 broken with respect to FP registers that it is hardly worth thinking
2297 of it as something to strive for compatibility with.
2298 The version of x86/svr4 SDB I have at the moment does (partially)
2299 seem to believe that DWARF register number 11 is associated with
2300 the x86 register %st(0), but that's about all. Higher DWARF
2301 register numbers don't seem to be associated with anything in
2302 particular, and even for DWARF regno 11, SDB only seems to under-
2303 stand that it should say that a variable lives in %st(0) (when
2304 asked via an `=' command) if we said it was in DWARF regno 11,
2305 but SDB still prints garbage when asked for the value of the
2306 variable in question (via a `/' command).
2307 (Also note that the labels SDB prints for various FP stack regs
2308 when doing an `x' command are all wrong.)
2309 Note that these problems generally don't affect the native SVR4
2310 C compiler because it doesn't allow the use of -O with -g and
2311 because when it is *not* optimizing, it allocates a memory
2312 location for each floating-point variable, and the memory
2313 location is what gets described in the DWARF AT_location
2314 attribute for the variable in question.
2315 Regardless of the severe mental illness of the x86/svr4 SDB, we
2316 do something sensible here and we use the following DWARF
2317 register numbers. Note that these are all stack-top-relative
2319 11 for %st(0) (gcc regno = 8)
2320 12 for %st(1) (gcc regno = 9)
2321 13 for %st(2) (gcc regno = 10)
2322 14 for %st(3) (gcc regno = 11)
2323 15 for %st(4) (gcc regno = 12)
2324 16 for %st(5) (gcc regno = 13)
2325 17 for %st(6) (gcc regno = 14)
2326 18 for %st(7) (gcc regno = 15)
2328 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2330 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2331 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2332 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2333 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2334 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2335 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2336 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2339 /* Define parameter passing and return registers. */
2341 static int const x86_64_int_parameter_registers[6] =
2343 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2346 static int const x86_64_ms_abi_int_parameter_registers[4] =
2348 CX_REG, DX_REG, R8_REG, R9_REG
2351 static int const x86_64_int_return_registers[4] =
2353 AX_REG, DX_REG, DI_REG, SI_REG
2356 /* Define the structure for the machine field in struct function. */
2358 struct GTY(()) stack_local_entry {
2359 unsigned short mode;
2362 struct stack_local_entry *next;
2365 /* Structure describing stack frame layout.
2366 Stack grows downward:
2372 saved static chain if ix86_static_chain_on_stack
2374 saved frame pointer if frame_pointer_needed
2375 <- HARD_FRAME_POINTER
2381 <- sse_regs_save_offset
2384 [va_arg registers] |
2388 [padding2] | = to_allocate
2397 int outgoing_arguments_size;
2398 HOST_WIDE_INT frame;
2400 /* The offsets relative to ARG_POINTER. */
2401 HOST_WIDE_INT frame_pointer_offset;
2402 HOST_WIDE_INT hard_frame_pointer_offset;
2403 HOST_WIDE_INT stack_pointer_offset;
2404 HOST_WIDE_INT hfp_save_offset;
2405 HOST_WIDE_INT reg_save_offset;
2406 HOST_WIDE_INT sse_reg_save_offset;
2408 /* When save_regs_using_mov is set, emit prologue using
2409 move instead of push instructions. */
2410 bool save_regs_using_mov;
2413 /* Which cpu are we scheduling for. */
2414 enum attr_cpu ix86_schedule;
2416 /* Which cpu are we optimizing for. */
2417 enum processor_type ix86_tune;
2419 /* Which instruction set architecture to use. */
2420 enum processor_type ix86_arch;
2422 /* true if sse prefetch instruction is not NOOP. */
2423 int x86_prefetch_sse;
2425 /* -mstackrealign option */
2426 static const char ix86_force_align_arg_pointer_string[]
2427 = "force_align_arg_pointer";
2429 static rtx (*ix86_gen_leave) (void);
2430 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2431 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2432 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2433 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2434 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2435 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2436 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2437 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2438 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2440 /* Preferred alignment for stack boundary in bits. */
2441 unsigned int ix86_preferred_stack_boundary;
2443 /* Alignment for incoming stack boundary in bits specified at
2445 static unsigned int ix86_user_incoming_stack_boundary;
2447 /* Default alignment for incoming stack boundary in bits. */
2448 static unsigned int ix86_default_incoming_stack_boundary;
2450 /* Alignment for incoming stack boundary in bits. */
2451 unsigned int ix86_incoming_stack_boundary;
2453 /* Calling abi specific va_list type nodes. */
2454 static GTY(()) tree sysv_va_list_type_node;
2455 static GTY(()) tree ms_va_list_type_node;
2457 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2458 char internal_label_prefix[16];
2459 int internal_label_prefix_len;
2461 /* Fence to use after loop using movnt. */
2464 /* Register class used for passing given 64bit part of the argument.
2465 These represent classes as documented by the PS ABI, with the exception
2466 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2467 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2469 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2470 whenever possible (upper half does contain padding). */
2471 enum x86_64_reg_class
2474 X86_64_INTEGER_CLASS,
2475 X86_64_INTEGERSI_CLASS,
2482 X86_64_COMPLEX_X87_CLASS,
2486 #define MAX_CLASSES 4
2488 /* Table of constants used by fldpi, fldln2, etc.... */
2489 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2490 static bool ext_80387_constants_init = 0;
2493 static struct machine_function * ix86_init_machine_status (void);
2494 static rtx ix86_function_value (const_tree, const_tree, bool);
2495 static bool ix86_function_value_regno_p (const unsigned int);
2496 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2498 static rtx ix86_static_chain (const_tree, bool);
2499 static int ix86_function_regparm (const_tree, const_tree);
2500 static void ix86_compute_frame_layout (struct ix86_frame *);
2501 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2503 static void ix86_add_new_builtins (HOST_WIDE_INT);
2504 static rtx ix86_expand_vec_perm_builtin (tree);
2505 static tree ix86_canonical_va_list_type (tree);
2506 static void predict_jump (int);
2507 static unsigned int split_stack_prologue_scratch_regno (void);
2508 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2510 enum ix86_function_specific_strings
2512 IX86_FUNCTION_SPECIFIC_ARCH,
2513 IX86_FUNCTION_SPECIFIC_TUNE,
2514 IX86_FUNCTION_SPECIFIC_MAX
2517 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2518 const char *, enum fpmath_unit, bool);
2519 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2520 static void ix86_function_specific_save (struct cl_target_option *);
2521 static void ix86_function_specific_restore (struct cl_target_option *);
2522 static void ix86_function_specific_print (FILE *, int,
2523 struct cl_target_option *);
2524 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2525 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2526 struct gcc_options *);
2527 static bool ix86_can_inline_p (tree, tree);
2528 static void ix86_set_current_function (tree);
2529 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2531 static enum calling_abi ix86_function_abi (const_tree);
2534 #ifndef SUBTARGET32_DEFAULT_CPU
2535 #define SUBTARGET32_DEFAULT_CPU "i386"
2538 /* The svr4 ABI for the i386 says that records and unions are returned
2540 #ifndef DEFAULT_PCC_STRUCT_RETURN
2541 #define DEFAULT_PCC_STRUCT_RETURN 1
2544 /* Whether -mtune= or -march= were specified */
2545 static int ix86_tune_defaulted;
2546 static int ix86_arch_specified;
2548 /* Vectorization library interface and handlers. */
2549 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2551 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2552 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2554 /* Processor target table, indexed by processor number */
2557 const struct processor_costs *cost; /* Processor costs */
2558 const int align_loop; /* Default alignments. */
2559 const int align_loop_max_skip;
2560 const int align_jump;
2561 const int align_jump_max_skip;
2562 const int align_func;
2565 static const struct ptt processor_target_table[PROCESSOR_max] =
2567 {&i386_cost, 4, 3, 4, 3, 4},
2568 {&i486_cost, 16, 15, 16, 15, 16},
2569 {&pentium_cost, 16, 7, 16, 7, 16},
2570 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2571 {&geode_cost, 0, 0, 0, 0, 0},
2572 {&k6_cost, 32, 7, 32, 7, 32},
2573 {&athlon_cost, 16, 7, 16, 7, 16},
2574 {&pentium4_cost, 0, 0, 0, 0, 0},
2575 {&k8_cost, 16, 7, 16, 7, 16},
2576 {&nocona_cost, 0, 0, 0, 0, 0},
2577 /* Core 2 32-bit. */
2578 {&generic32_cost, 16, 10, 16, 10, 16},
2579 /* Core 2 64-bit. */
2580 {&generic64_cost, 16, 10, 16, 10, 16},
2581 /* Core i7 32-bit. */
2582 {&generic32_cost, 16, 10, 16, 10, 16},
2583 /* Core i7 64-bit. */
2584 {&generic64_cost, 16, 10, 16, 10, 16},
2585 {&generic32_cost, 16, 7, 16, 7, 16},
2586 {&generic64_cost, 16, 10, 16, 10, 16},
2587 {&amdfam10_cost, 32, 24, 32, 7, 32},
2588 {&bdver1_cost, 32, 24, 32, 7, 32},
2589 {&bdver2_cost, 32, 24, 32, 7, 32},
2590 {&btver1_cost, 32, 24, 32, 7, 32},
2591 {&atom_cost, 16, 7, 16, 7, 16}
2594 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2624 /* Return true if a red-zone is in use. */
2627 ix86_using_red_zone (void)
2629 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2632 /* Return a string that documents the current -m options. The caller is
2633 responsible for freeing the string. */
2636 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2637 const char *tune, enum fpmath_unit fpmath,
2640 struct ix86_target_opts
2642 const char *option; /* option string */
2643 HOST_WIDE_INT mask; /* isa mask options */
2646 /* This table is ordered so that options like -msse4.2 that imply
2647 preceding options while match those first. */
2648 static struct ix86_target_opts isa_opts[] =
2650 { "-m64", OPTION_MASK_ISA_64BIT },
2651 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2652 { "-mfma", OPTION_MASK_ISA_FMA },
2653 { "-mxop", OPTION_MASK_ISA_XOP },
2654 { "-mlwp", OPTION_MASK_ISA_LWP },
2655 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2656 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2657 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2658 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2659 { "-msse3", OPTION_MASK_ISA_SSE3 },
2660 { "-msse2", OPTION_MASK_ISA_SSE2 },
2661 { "-msse", OPTION_MASK_ISA_SSE },
2662 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2663 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2664 { "-mmmx", OPTION_MASK_ISA_MMX },
2665 { "-mabm", OPTION_MASK_ISA_ABM },
2666 { "-mbmi", OPTION_MASK_ISA_BMI },
2667 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2668 { "-mtbm", OPTION_MASK_ISA_TBM },
2669 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2670 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2671 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2672 { "-maes", OPTION_MASK_ISA_AES },
2673 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2674 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2675 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2676 { "-mf16c", OPTION_MASK_ISA_F16C },
2680 static struct ix86_target_opts flag_opts[] =
2682 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2683 { "-m80387", MASK_80387 },
2684 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2685 { "-malign-double", MASK_ALIGN_DOUBLE },
2686 { "-mcld", MASK_CLD },
2687 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2688 { "-mieee-fp", MASK_IEEE_FP },
2689 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2690 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2691 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2692 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2693 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2694 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2695 { "-mno-red-zone", MASK_NO_RED_ZONE },
2696 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2697 { "-mrecip", MASK_RECIP },
2698 { "-mrtd", MASK_RTD },
2699 { "-msseregparm", MASK_SSEREGPARM },
2700 { "-mstack-arg-probe", MASK_STACK_PROBE },
2701 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2702 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2703 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2704 { "-mvzeroupper", MASK_VZEROUPPER },
2705 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2706 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2707 { "-mprefer-avx128", MASK_PREFER_AVX128},
2710 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2713 char target_other[40];
2722 memset (opts, '\0', sizeof (opts));
2724 /* Add -march= option. */
2727 opts[num][0] = "-march=";
2728 opts[num++][1] = arch;
2731 /* Add -mtune= option. */
2734 opts[num][0] = "-mtune=";
2735 opts[num++][1] = tune;
2738 /* Pick out the options in isa options. */
2739 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2741 if ((isa & isa_opts[i].mask) != 0)
2743 opts[num++][0] = isa_opts[i].option;
2744 isa &= ~ isa_opts[i].mask;
2748 if (isa && add_nl_p)
2750 opts[num++][0] = isa_other;
2751 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2755 /* Add flag options. */
2756 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2758 if ((flags & flag_opts[i].mask) != 0)
2760 opts[num++][0] = flag_opts[i].option;
2761 flags &= ~ flag_opts[i].mask;
2765 if (flags && add_nl_p)
2767 opts[num++][0] = target_other;
2768 sprintf (target_other, "(other flags: %#x)", flags);
2771 /* Add -fpmath= option. */
2774 opts[num][0] = "-mfpmath=";
2775 switch ((int) fpmath)
2778 opts[num++][1] = "387";
2782 opts[num++][1] = "sse";
2785 case FPMATH_387 | FPMATH_SSE:
2786 opts[num++][1] = "sse+387";
2798 gcc_assert (num < ARRAY_SIZE (opts));
2800 /* Size the string. */
2802 sep_len = (add_nl_p) ? 3 : 1;
2803 for (i = 0; i < num; i++)
2806 for (j = 0; j < 2; j++)
2808 len += strlen (opts[i][j]);
2811 /* Build the string. */
2812 ret = ptr = (char *) xmalloc (len);
2815 for (i = 0; i < num; i++)
2819 for (j = 0; j < 2; j++)
2820 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2827 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2835 for (j = 0; j < 2; j++)
2838 memcpy (ptr, opts[i][j], len2[j]);
2840 line_len += len2[j];
2845 gcc_assert (ret + len >= ptr);
2850 /* Return true, if profiling code should be emitted before
2851 prologue. Otherwise it returns false.
2852 Note: For x86 with "hotfix" it is sorried. */
2854 ix86_profile_before_prologue (void)
2856 return flag_fentry != 0;
2859 /* Function that is callable from the debugger to print the current
2862 ix86_debug_options (void)
2864 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2865 ix86_arch_string, ix86_tune_string,
2870 fprintf (stderr, "%s\n\n", opts);
2874 fputs ("<no options>\n\n", stderr);
2879 /* Override various settings based on options. If MAIN_ARGS_P, the
2880 options are from the command line, otherwise they are from
2884 ix86_option_override_internal (bool main_args_p)
2887 unsigned int ix86_arch_mask, ix86_tune_mask;
2888 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2893 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2894 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2895 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2896 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2897 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2898 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2899 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2900 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2901 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2902 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2903 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2904 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2905 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2906 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2907 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2908 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2909 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2910 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2911 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2912 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2913 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2914 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2915 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2916 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2917 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2918 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2919 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2920 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2921 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2922 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2923 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2924 /* if this reaches 64, need to widen struct pta flags below */
2928 const char *const name; /* processor name or nickname. */
2929 const enum processor_type processor;
2930 const enum attr_cpu schedule;
2931 const unsigned HOST_WIDE_INT flags;
2933 const processor_alias_table[] =
2935 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2936 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2937 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2938 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2939 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2940 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2941 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2942 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2943 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2944 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2945 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2946 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2947 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2949 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2951 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2952 PTA_MMX | PTA_SSE | PTA_SSE2},
2953 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2954 PTA_MMX |PTA_SSE | PTA_SSE2},
2955 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2956 PTA_MMX | PTA_SSE | PTA_SSE2},
2957 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2958 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2959 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2960 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2961 | PTA_CX16 | PTA_NO_SAHF},
2962 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2963 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2964 | PTA_SSSE3 | PTA_CX16},
2965 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2966 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2967 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2968 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2969 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2970 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2971 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2972 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2973 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2974 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2975 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2976 | PTA_RDRND | PTA_F16C},
2977 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2978 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2979 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2980 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2981 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_LZCNT | PTA_FMA
2983 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2984 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2985 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2986 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2987 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2988 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2989 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2990 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2991 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2992 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2993 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2994 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2995 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2996 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2997 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2998 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2999 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3000 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3001 {"x86-64", PROCESSOR_K8, CPU_K8,
3002 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3003 {"k8", PROCESSOR_K8, CPU_K8,
3004 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3005 | PTA_SSE2 | PTA_NO_SAHF},
3006 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3007 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3008 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3009 {"opteron", PROCESSOR_K8, CPU_K8,
3010 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3011 | PTA_SSE2 | PTA_NO_SAHF},
3012 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3013 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3014 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3015 {"athlon64", PROCESSOR_K8, CPU_K8,
3016 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3017 | PTA_SSE2 | PTA_NO_SAHF},
3018 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3019 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3020 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3021 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3022 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3023 | PTA_SSE2 | PTA_NO_SAHF},
3024 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3025 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3026 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3027 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3028 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3029 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3030 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3031 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3032 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3033 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3034 | PTA_XOP | PTA_LWP},
3035 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3036 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3037 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3038 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3039 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3041 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3042 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3043 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3044 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3045 0 /* flags are only used for -march switch. */ },
3046 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3047 PTA_64BIT /* flags are only used for -march switch. */ },
3050 int const pta_size = ARRAY_SIZE (processor_alias_table);
3052 /* Set up prefix/suffix so the error messages refer to either the command
3053 line argument, or the attribute(target). */
3062 prefix = "option(\"";
3067 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3068 SUBTARGET_OVERRIDE_OPTIONS;
3071 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3072 SUBSUBTARGET_OVERRIDE_OPTIONS;
3076 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3078 /* -fPIC is the default for x86_64. */
3079 if (TARGET_MACHO && TARGET_64BIT)
3082 /* Need to check -mtune=generic first. */
3083 if (ix86_tune_string)
3085 if (!strcmp (ix86_tune_string, "generic")
3086 || !strcmp (ix86_tune_string, "i686")
3087 /* As special support for cross compilers we read -mtune=native
3088 as -mtune=generic. With native compilers we won't see the
3089 -mtune=native, as it was changed by the driver. */
3090 || !strcmp (ix86_tune_string, "native"))
3093 ix86_tune_string = "generic64";
3095 ix86_tune_string = "generic32";
3097 /* If this call is for setting the option attribute, allow the
3098 generic32/generic64 that was previously set. */
3099 else if (!main_args_p
3100 && (!strcmp (ix86_tune_string, "generic32")
3101 || !strcmp (ix86_tune_string, "generic64")))
3103 else if (!strncmp (ix86_tune_string, "generic", 7))
3104 error ("bad value (%s) for %stune=%s %s",
3105 ix86_tune_string, prefix, suffix, sw);
3106 else if (!strcmp (ix86_tune_string, "x86-64"))
3107 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3108 "%stune=k8%s or %stune=generic%s instead as appropriate",
3109 prefix, suffix, prefix, suffix, prefix, suffix);
3113 if (ix86_arch_string)
3114 ix86_tune_string = ix86_arch_string;
3115 if (!ix86_tune_string)
3117 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3118 ix86_tune_defaulted = 1;
3121 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3122 need to use a sensible tune option. */
3123 if (!strcmp (ix86_tune_string, "generic")
3124 || !strcmp (ix86_tune_string, "x86-64")
3125 || !strcmp (ix86_tune_string, "i686"))
3128 ix86_tune_string = "generic64";
3130 ix86_tune_string = "generic32";
3134 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3136 /* rep; movq isn't available in 32-bit code. */
3137 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3138 ix86_stringop_alg = no_stringop;
3141 if (!ix86_arch_string)
3142 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3144 ix86_arch_specified = 1;
3146 if (!global_options_set.x_ix86_abi)
3147 ix86_abi = DEFAULT_ABI;
3149 if (global_options_set.x_ix86_cmodel)
3151 switch (ix86_cmodel)
3156 ix86_cmodel = CM_SMALL_PIC;
3158 error ("code model %qs not supported in the %s bit mode",
3165 ix86_cmodel = CM_MEDIUM_PIC;
3167 error ("code model %qs not supported in the %s bit mode",
3169 else if (TARGET_X32)
3170 error ("code model %qs not supported in x32 mode",
3177 ix86_cmodel = CM_LARGE_PIC;
3179 error ("code model %qs not supported in the %s bit mode",
3181 else if (TARGET_X32)
3182 error ("code model %qs not supported in x32 mode",
3188 error ("code model %s does not support PIC mode", "32");
3190 error ("code model %qs not supported in the %s bit mode",
3197 error ("code model %s does not support PIC mode", "kernel");
3198 ix86_cmodel = CM_32;
3201 error ("code model %qs not supported in the %s bit mode",
3211 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3212 use of rip-relative addressing. This eliminates fixups that
3213 would otherwise be needed if this object is to be placed in a
3214 DLL, and is essentially just as efficient as direct addressing. */
3215 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3216 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3217 else if (TARGET_64BIT)
3218 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3220 ix86_cmodel = CM_32;
3222 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3224 error ("-masm=intel not supported in this configuration");
3225 ix86_asm_dialect = ASM_ATT;
3227 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3228 sorry ("%i-bit mode not compiled in",
3229 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3231 for (i = 0; i < pta_size; i++)
3232 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3234 ix86_schedule = processor_alias_table[i].schedule;
3235 ix86_arch = processor_alias_table[i].processor;
3236 /* Default cpu tuning to the architecture. */
3237 ix86_tune = ix86_arch;
3239 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3240 error ("CPU you selected does not support x86-64 "
3243 if (processor_alias_table[i].flags & PTA_MMX
3244 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3245 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3246 if (processor_alias_table[i].flags & PTA_3DNOW
3247 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3248 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3249 if (processor_alias_table[i].flags & PTA_3DNOW_A
3250 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3251 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3252 if (processor_alias_table[i].flags & PTA_SSE
3253 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3254 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3255 if (processor_alias_table[i].flags & PTA_SSE2
3256 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3257 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3258 if (processor_alias_table[i].flags & PTA_SSE3
3259 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3260 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3261 if (processor_alias_table[i].flags & PTA_SSSE3
3262 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3263 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3264 if (processor_alias_table[i].flags & PTA_SSE4_1
3265 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3266 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3267 if (processor_alias_table[i].flags & PTA_SSE4_2
3268 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3269 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3270 if (processor_alias_table[i].flags & PTA_AVX
3271 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3272 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3273 if (processor_alias_table[i].flags & PTA_AVX2
3274 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3275 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3276 if (processor_alias_table[i].flags & PTA_FMA
3277 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3278 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3279 if (processor_alias_table[i].flags & PTA_SSE4A
3280 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3281 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3282 if (processor_alias_table[i].flags & PTA_FMA4
3283 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3284 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3285 if (processor_alias_table[i].flags & PTA_XOP
3286 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3287 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3288 if (processor_alias_table[i].flags & PTA_LWP
3289 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3290 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3291 if (processor_alias_table[i].flags & PTA_ABM
3292 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3293 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3294 if (processor_alias_table[i].flags & PTA_BMI
3295 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3296 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3297 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3298 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3299 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3300 if (processor_alias_table[i].flags & PTA_TBM
3301 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3302 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3303 if (processor_alias_table[i].flags & PTA_CX16
3304 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3305 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3306 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3307 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3308 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3309 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3310 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3311 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3312 if (processor_alias_table[i].flags & PTA_MOVBE
3313 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3314 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3315 if (processor_alias_table[i].flags & PTA_AES
3316 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3317 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3318 if (processor_alias_table[i].flags & PTA_PCLMUL
3319 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3320 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3321 if (processor_alias_table[i].flags & PTA_FSGSBASE
3322 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3323 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3324 if (processor_alias_table[i].flags & PTA_RDRND
3325 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3326 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3327 if (processor_alias_table[i].flags & PTA_F16C
3328 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3329 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3330 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3331 x86_prefetch_sse = true;
3336 if (!strcmp (ix86_arch_string, "generic"))
3337 error ("generic CPU can be used only for %stune=%s %s",
3338 prefix, suffix, sw);
3339 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3340 error ("bad value (%s) for %sarch=%s %s",
3341 ix86_arch_string, prefix, suffix, sw);
3343 ix86_arch_mask = 1u << ix86_arch;
3344 for (i = 0; i < X86_ARCH_LAST; ++i)
3345 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3347 for (i = 0; i < pta_size; i++)
3348 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3350 ix86_schedule = processor_alias_table[i].schedule;
3351 ix86_tune = processor_alias_table[i].processor;
3354 if (!(processor_alias_table[i].flags & PTA_64BIT))
3356 if (ix86_tune_defaulted)
3358 ix86_tune_string = "x86-64";
3359 for (i = 0; i < pta_size; i++)
3360 if (! strcmp (ix86_tune_string,
3361 processor_alias_table[i].name))
3363 ix86_schedule = processor_alias_table[i].schedule;
3364 ix86_tune = processor_alias_table[i].processor;
3367 error ("CPU you selected does not support x86-64 "
3373 /* Adjust tuning when compiling for 32-bit ABI. */
3376 case PROCESSOR_GENERIC64:
3377 ix86_tune = PROCESSOR_GENERIC32;
3378 ix86_schedule = CPU_PENTIUMPRO;
3381 case PROCESSOR_CORE2_64:
3382 ix86_tune = PROCESSOR_CORE2_32;
3385 case PROCESSOR_COREI7_64:
3386 ix86_tune = PROCESSOR_COREI7_32;
3393 /* Intel CPUs have always interpreted SSE prefetch instructions as
3394 NOPs; so, we can enable SSE prefetch instructions even when
3395 -mtune (rather than -march) points us to a processor that has them.
3396 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3397 higher processors. */
3399 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3400 x86_prefetch_sse = true;
3404 if (ix86_tune_specified && i == pta_size)
3405 error ("bad value (%s) for %stune=%s %s",
3406 ix86_tune_string, prefix, suffix, sw);
3408 ix86_tune_mask = 1u << ix86_tune;
3409 for (i = 0; i < X86_TUNE_LAST; ++i)
3410 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3412 #ifndef USE_IX86_FRAME_POINTER
3413 #define USE_IX86_FRAME_POINTER 0
3416 #ifndef USE_X86_64_FRAME_POINTER
3417 #define USE_X86_64_FRAME_POINTER 0
3420 /* Set the default values for switches whose default depends on TARGET_64BIT
3421 in case they weren't overwritten by command line options. */
3424 if (optimize > 1 && !global_options_set.x_flag_zee)
3426 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3427 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3428 if (flag_asynchronous_unwind_tables == 2)
3429 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3430 if (flag_pcc_struct_return == 2)
3431 flag_pcc_struct_return = 0;
3435 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3436 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3437 if (flag_asynchronous_unwind_tables == 2)
3438 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3439 if (flag_pcc_struct_return == 2)
3440 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3444 ix86_cost = &ix86_size_cost;
3446 ix86_cost = processor_target_table[ix86_tune].cost;
3448 /* Arrange to set up i386_stack_locals for all functions. */
3449 init_machine_status = ix86_init_machine_status;
3451 /* Validate -mregparm= value. */
3452 if (global_options_set.x_ix86_regparm)
3455 warning (0, "-mregparm is ignored in 64-bit mode");
3456 if (ix86_regparm > REGPARM_MAX)
3458 error ("-mregparm=%d is not between 0 and %d",
3459 ix86_regparm, REGPARM_MAX);
3464 ix86_regparm = REGPARM_MAX;
3466 /* Default align_* from the processor table. */
3467 if (align_loops == 0)
3469 align_loops = processor_target_table[ix86_tune].align_loop;
3470 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3472 if (align_jumps == 0)
3474 align_jumps = processor_target_table[ix86_tune].align_jump;
3475 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3477 if (align_functions == 0)
3479 align_functions = processor_target_table[ix86_tune].align_func;
3482 /* Provide default for -mbranch-cost= value. */
3483 if (!global_options_set.x_ix86_branch_cost)
3484 ix86_branch_cost = ix86_cost->branch_cost;
3488 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3490 /* Enable by default the SSE and MMX builtins. Do allow the user to
3491 explicitly disable any of these. In particular, disabling SSE and
3492 MMX for kernel code is extremely useful. */
3493 if (!ix86_arch_specified)
3495 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3496 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3499 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3503 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3505 if (!ix86_arch_specified)
3507 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3509 /* i386 ABI does not specify red zone. It still makes sense to use it
3510 when programmer takes care to stack from being destroyed. */
3511 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3512 target_flags |= MASK_NO_RED_ZONE;
3515 /* Keep nonleaf frame pointers. */
3516 if (flag_omit_frame_pointer)
3517 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3518 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3519 flag_omit_frame_pointer = 1;
3521 /* If we're doing fast math, we don't care about comparison order
3522 wrt NaNs. This lets us use a shorter comparison sequence. */
3523 if (flag_finite_math_only)
3524 target_flags &= ~MASK_IEEE_FP;
3526 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3527 since the insns won't need emulation. */
3528 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3529 target_flags &= ~MASK_NO_FANCY_MATH_387;
3531 /* Likewise, if the target doesn't have a 387, or we've specified
3532 software floating point, don't use 387 inline intrinsics. */
3534 target_flags |= MASK_NO_FANCY_MATH_387;
3536 /* Turn on MMX builtins for -msse. */
3539 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3540 x86_prefetch_sse = true;
3543 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3544 if (TARGET_SSE4_2 || TARGET_ABM)
3545 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3547 /* Turn on lzcnt instruction for -mabm. */
3549 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3551 /* Validate -mpreferred-stack-boundary= value or default it to
3552 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3553 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3554 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3556 int min = (TARGET_64BIT ? 4 : 2);
3557 int max = (TARGET_SEH ? 4 : 12);
3559 if (ix86_preferred_stack_boundary_arg < min
3560 || ix86_preferred_stack_boundary_arg > max)
3563 error ("-mpreferred-stack-boundary is not supported "
3566 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3567 ix86_preferred_stack_boundary_arg, min, max);
3570 ix86_preferred_stack_boundary
3571 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3574 /* Set the default value for -mstackrealign. */
3575 if (ix86_force_align_arg_pointer == -1)
3576 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3578 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3580 /* Validate -mincoming-stack-boundary= value or default it to
3581 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3582 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3583 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3585 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3586 || ix86_incoming_stack_boundary_arg > 12)
3587 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3588 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3591 ix86_user_incoming_stack_boundary
3592 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3593 ix86_incoming_stack_boundary
3594 = ix86_user_incoming_stack_boundary;
3598 /* Accept -msseregparm only if at least SSE support is enabled. */
3599 if (TARGET_SSEREGPARM
3601 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3603 if (global_options_set.x_ix86_fpmath)
3605 if (ix86_fpmath & FPMATH_SSE)
3609 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3610 ix86_fpmath = FPMATH_387;
3612 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3614 warning (0, "387 instruction set disabled, using SSE arithmetics");
3615 ix86_fpmath = FPMATH_SSE;
3620 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3622 /* If the i387 is disabled, then do not return values in it. */
3624 target_flags &= ~MASK_FLOAT_RETURNS;
3626 /* Use external vectorized library in vectorizing intrinsics. */
3627 if (global_options_set.x_ix86_veclibabi_type)
3628 switch (ix86_veclibabi_type)
3630 case ix86_veclibabi_type_svml:
3631 ix86_veclib_handler = ix86_veclibabi_svml;
3634 case ix86_veclibabi_type_acml:
3635 ix86_veclib_handler = ix86_veclibabi_acml;
3642 if ((!USE_IX86_FRAME_POINTER
3643 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3644 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3646 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3648 /* ??? Unwind info is not correct around the CFG unless either a frame
3649 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3650 unwind info generation to be aware of the CFG and propagating states
3652 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3653 || flag_exceptions || flag_non_call_exceptions)
3654 && flag_omit_frame_pointer
3655 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3657 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3658 warning (0, "unwind tables currently require either a frame pointer "
3659 "or %saccumulate-outgoing-args%s for correctness",
3661 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3664 /* If stack probes are required, the space used for large function
3665 arguments on the stack must also be probed, so enable
3666 -maccumulate-outgoing-args so this happens in the prologue. */
3667 if (TARGET_STACK_PROBE
3668 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3670 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3671 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3672 "for correctness", prefix, suffix);
3673 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3676 /* For sane SSE instruction set generation we need fcomi instruction.
3677 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3678 expands to a sequence that includes conditional move. */
3679 if (TARGET_SSE || TARGET_RDRND)
3682 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3685 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3686 p = strchr (internal_label_prefix, 'X');
3687 internal_label_prefix_len = p - internal_label_prefix;
3691 /* When scheduling description is not available, disable scheduler pass
3692 so it won't slow down the compilation and make x87 code slower. */
3693 if (!TARGET_SCHEDULE)
3694 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3696 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3697 ix86_cost->simultaneous_prefetches,
3698 global_options.x_param_values,
3699 global_options_set.x_param_values);
3700 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3701 global_options.x_param_values,
3702 global_options_set.x_param_values);
3703 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3704 global_options.x_param_values,
3705 global_options_set.x_param_values);
3706 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3707 global_options.x_param_values,
3708 global_options_set.x_param_values);
3710 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3711 if (flag_prefetch_loop_arrays < 0
3714 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3715 flag_prefetch_loop_arrays = 1;
3717 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3718 can be optimized to ap = __builtin_next_arg (0). */
3719 if (!TARGET_64BIT && !flag_split_stack)
3720 targetm.expand_builtin_va_start = NULL;
3724 ix86_gen_leave = gen_leave_rex64;
3725 ix86_gen_add3 = gen_adddi3;
3726 ix86_gen_sub3 = gen_subdi3;
3727 ix86_gen_sub3_carry = gen_subdi3_carry;
3728 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3729 ix86_gen_monitor = gen_sse3_monitor64;
3730 ix86_gen_andsp = gen_anddi3;
3731 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3732 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3733 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3737 ix86_gen_leave = gen_leave;
3738 ix86_gen_add3 = gen_addsi3;
3739 ix86_gen_sub3 = gen_subsi3;
3740 ix86_gen_sub3_carry = gen_subsi3_carry;
3741 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3742 ix86_gen_monitor = gen_sse3_monitor;
3743 ix86_gen_andsp = gen_andsi3;
3744 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3745 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3746 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3750 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3752 target_flags |= MASK_CLD & ~target_flags_explicit;
3755 if (!TARGET_64BIT && flag_pic)
3757 if (flag_fentry > 0)
3758 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3762 else if (TARGET_SEH)
3764 if (flag_fentry == 0)
3765 sorry ("-mno-fentry isn%'t compatible with SEH");
3768 else if (flag_fentry < 0)
3770 #if defined(PROFILE_BEFORE_PROLOGUE)
3779 /* When not optimize for size, enable vzeroupper optimization for
3780 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3781 AVX unaligned load/store. */
3784 if (flag_expensive_optimizations
3785 && !(target_flags_explicit & MASK_VZEROUPPER))
3786 target_flags |= MASK_VZEROUPPER;
3787 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3788 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3789 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3790 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3791 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3792 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3793 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3794 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3795 target_flags |= MASK_PREFER_AVX128;
3800 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3801 target_flags &= ~MASK_VZEROUPPER;
3804 /* Save the initial options in case the user does function specific
3807 target_option_default_node = target_option_current_node
3808 = build_target_option_node ();
3811 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3814 function_pass_avx256_p (const_rtx val)
3819 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3822 if (GET_CODE (val) == PARALLEL)
3827 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3829 r = XVECEXP (val, 0, i);
3830 if (GET_CODE (r) == EXPR_LIST
3832 && REG_P (XEXP (r, 0))
3833 && (GET_MODE (XEXP (r, 0)) == OImode
3834 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3842 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3845 ix86_option_override (void)
3847 ix86_option_override_internal (true);
3850 /* Update register usage after having seen the compiler flags. */
3853 ix86_conditional_register_usage (void)
3858 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3860 if (fixed_regs[i] > 1)
3861 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3862 if (call_used_regs[i] > 1)
3863 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3866 /* The PIC register, if it exists, is fixed. */
3867 j = PIC_OFFSET_TABLE_REGNUM;
3868 if (j != INVALID_REGNUM)
3869 fixed_regs[j] = call_used_regs[j] = 1;
3871 /* The 64-bit MS_ABI changes the set of call-used registers. */
3872 if (TARGET_64BIT_MS_ABI)
3874 call_used_regs[SI_REG] = 0;
3875 call_used_regs[DI_REG] = 0;
3876 call_used_regs[XMM6_REG] = 0;
3877 call_used_regs[XMM7_REG] = 0;
3878 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3879 call_used_regs[i] = 0;
3882 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3883 other call-clobbered regs for 64-bit. */
3886 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3888 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3889 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3890 && call_used_regs[i])
3891 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3894 /* If MMX is disabled, squash the registers. */
3896 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3897 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3898 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3900 /* If SSE is disabled, squash the registers. */
3902 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3903 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3904 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3906 /* If the FPU is disabled, squash the registers. */
3907 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3908 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3909 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3910 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3912 /* If 32-bit, squash the 64-bit registers. */
3915 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3917 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3923 /* Save the current options */
3926 ix86_function_specific_save (struct cl_target_option *ptr)
3928 ptr->arch = ix86_arch;
3929 ptr->schedule = ix86_schedule;
3930 ptr->tune = ix86_tune;
3931 ptr->branch_cost = ix86_branch_cost;
3932 ptr->tune_defaulted = ix86_tune_defaulted;
3933 ptr->arch_specified = ix86_arch_specified;
3934 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3935 ptr->ix86_target_flags_explicit = target_flags_explicit;
3937 /* The fields are char but the variables are not; make sure the
3938 values fit in the fields. */
3939 gcc_assert (ptr->arch == ix86_arch);
3940 gcc_assert (ptr->schedule == ix86_schedule);
3941 gcc_assert (ptr->tune == ix86_tune);
3942 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3945 /* Restore the current options */
3948 ix86_function_specific_restore (struct cl_target_option *ptr)
3950 enum processor_type old_tune = ix86_tune;
3951 enum processor_type old_arch = ix86_arch;
3952 unsigned int ix86_arch_mask, ix86_tune_mask;
3955 ix86_arch = (enum processor_type) ptr->arch;
3956 ix86_schedule = (enum attr_cpu) ptr->schedule;
3957 ix86_tune = (enum processor_type) ptr->tune;
3958 ix86_branch_cost = ptr->branch_cost;
3959 ix86_tune_defaulted = ptr->tune_defaulted;
3960 ix86_arch_specified = ptr->arch_specified;
3961 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3962 target_flags_explicit = ptr->ix86_target_flags_explicit;
3964 /* Recreate the arch feature tests if the arch changed */
3965 if (old_arch != ix86_arch)
3967 ix86_arch_mask = 1u << ix86_arch;
3968 for (i = 0; i < X86_ARCH_LAST; ++i)
3969 ix86_arch_features[i]
3970 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3973 /* Recreate the tune optimization tests */
3974 if (old_tune != ix86_tune)
3976 ix86_tune_mask = 1u << ix86_tune;
3977 for (i = 0; i < X86_TUNE_LAST; ++i)
3978 ix86_tune_features[i]
3979 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3983 /* Print the current options */
3986 ix86_function_specific_print (FILE *file, int indent,
3987 struct cl_target_option *ptr)
3990 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3991 NULL, NULL, ptr->x_ix86_fpmath, false);
3993 fprintf (file, "%*sarch = %d (%s)\n",
3996 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3997 ? cpu_names[ptr->arch]
4000 fprintf (file, "%*stune = %d (%s)\n",
4003 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4004 ? cpu_names[ptr->tune]
4007 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4011 fprintf (file, "%*s%s\n", indent, "", target_string);
4012 free (target_string);
4017 /* Inner function to process the attribute((target(...))), take an argument and
4018 set the current options from the argument. If we have a list, recursively go
4022 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4023 struct gcc_options *enum_opts_set)
4028 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4029 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4030 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4031 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4032 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4048 enum ix86_opt_type type;
4053 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4054 IX86_ATTR_ISA ("abm", OPT_mabm),
4055 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4056 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4057 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4058 IX86_ATTR_ISA ("aes", OPT_maes),
4059 IX86_ATTR_ISA ("avx", OPT_mavx),
4060 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4061 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4062 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4063 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4064 IX86_ATTR_ISA ("sse", OPT_msse),
4065 IX86_ATTR_ISA ("sse2", OPT_msse2),
4066 IX86_ATTR_ISA ("sse3", OPT_msse3),
4067 IX86_ATTR_ISA ("sse4", OPT_msse4),
4068 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4069 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4070 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4071 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4072 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4073 IX86_ATTR_ISA ("xop", OPT_mxop),
4074 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4075 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4076 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4077 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4080 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4082 /* string options */
4083 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4084 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4087 IX86_ATTR_YES ("cld",
4091 IX86_ATTR_NO ("fancy-math-387",
4092 OPT_mfancy_math_387,
4093 MASK_NO_FANCY_MATH_387),
4095 IX86_ATTR_YES ("ieee-fp",
4099 IX86_ATTR_YES ("inline-all-stringops",
4100 OPT_minline_all_stringops,
4101 MASK_INLINE_ALL_STRINGOPS),
4103 IX86_ATTR_YES ("inline-stringops-dynamically",
4104 OPT_minline_stringops_dynamically,
4105 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4107 IX86_ATTR_NO ("align-stringops",
4108 OPT_mno_align_stringops,
4109 MASK_NO_ALIGN_STRINGOPS),
4111 IX86_ATTR_YES ("recip",
4117 /* If this is a list, recurse to get the options. */
4118 if (TREE_CODE (args) == TREE_LIST)
4122 for (; args; args = TREE_CHAIN (args))
4123 if (TREE_VALUE (args)
4124 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4125 p_strings, enum_opts_set))
4131 else if (TREE_CODE (args) != STRING_CST)
4134 /* Handle multiple arguments separated by commas. */
4135 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4137 while (next_optstr && *next_optstr != '\0')
4139 char *p = next_optstr;
4141 char *comma = strchr (next_optstr, ',');
4142 const char *opt_string;
4143 size_t len, opt_len;
4148 enum ix86_opt_type type = ix86_opt_unknown;
4154 len = comma - next_optstr;
4155 next_optstr = comma + 1;
4163 /* Recognize no-xxx. */
4164 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4173 /* Find the option. */
4176 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4178 type = attrs[i].type;
4179 opt_len = attrs[i].len;
4180 if (ch == attrs[i].string[0]
4181 && ((type != ix86_opt_str && type != ix86_opt_enum)
4184 && memcmp (p, attrs[i].string, opt_len) == 0)
4187 mask = attrs[i].mask;
4188 opt_string = attrs[i].string;
4193 /* Process the option. */
4196 error ("attribute(target(\"%s\")) is unknown", orig_p);
4200 else if (type == ix86_opt_isa)
4202 struct cl_decoded_option decoded;
4204 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4205 ix86_handle_option (&global_options, &global_options_set,
4206 &decoded, input_location);
4209 else if (type == ix86_opt_yes || type == ix86_opt_no)
4211 if (type == ix86_opt_no)
4212 opt_set_p = !opt_set_p;
4215 target_flags |= mask;
4217 target_flags &= ~mask;
4220 else if (type == ix86_opt_str)
4224 error ("option(\"%s\") was already specified", opt_string);
4228 p_strings[opt] = xstrdup (p + opt_len);
4231 else if (type == ix86_opt_enum)
4236 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4238 set_option (&global_options, enum_opts_set, opt, value,
4239 p + opt_len, DK_UNSPECIFIED, input_location,
4243 error ("attribute(target(\"%s\")) is unknown", orig_p);
4255 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4258 ix86_valid_target_attribute_tree (tree args)
4260 const char *orig_arch_string = ix86_arch_string;
4261 const char *orig_tune_string = ix86_tune_string;
4262 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4263 int orig_tune_defaulted = ix86_tune_defaulted;
4264 int orig_arch_specified = ix86_arch_specified;
4265 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4268 struct cl_target_option *def
4269 = TREE_TARGET_OPTION (target_option_default_node);
4270 struct gcc_options enum_opts_set;
4272 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4274 /* Process each of the options on the chain. */
4275 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4279 /* If the changed options are different from the default, rerun
4280 ix86_option_override_internal, and then save the options away.
4281 The string options are are attribute options, and will be undone
4282 when we copy the save structure. */
4283 if (ix86_isa_flags != def->x_ix86_isa_flags
4284 || target_flags != def->x_target_flags
4285 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4286 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4287 || enum_opts_set.x_ix86_fpmath)
4289 /* If we are using the default tune= or arch=, undo the string assigned,
4290 and use the default. */
4291 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4292 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4293 else if (!orig_arch_specified)
4294 ix86_arch_string = NULL;
4296 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4297 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4298 else if (orig_tune_defaulted)
4299 ix86_tune_string = NULL;
4301 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4302 if (enum_opts_set.x_ix86_fpmath)
4303 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4304 else if (!TARGET_64BIT && TARGET_SSE)
4306 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4307 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4310 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4311 ix86_option_override_internal (false);
4313 /* Add any builtin functions with the new isa if any. */
4314 ix86_add_new_builtins (ix86_isa_flags);
4316 /* Save the current options unless we are validating options for
4318 t = build_target_option_node ();
4320 ix86_arch_string = orig_arch_string;
4321 ix86_tune_string = orig_tune_string;
4322 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4324 /* Free up memory allocated to hold the strings */
4325 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4326 free (option_strings[i]);
4332 /* Hook to validate attribute((target("string"))). */
4335 ix86_valid_target_attribute_p (tree fndecl,
4336 tree ARG_UNUSED (name),
4338 int ARG_UNUSED (flags))
4340 struct cl_target_option cur_target;
4342 tree old_optimize = build_optimization_node ();
4343 tree new_target, new_optimize;
4344 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4346 /* If the function changed the optimization levels as well as setting target
4347 options, start with the optimizations specified. */
4348 if (func_optimize && func_optimize != old_optimize)
4349 cl_optimization_restore (&global_options,
4350 TREE_OPTIMIZATION (func_optimize));
4352 /* The target attributes may also change some optimization flags, so update
4353 the optimization options if necessary. */
4354 cl_target_option_save (&cur_target, &global_options);
4355 new_target = ix86_valid_target_attribute_tree (args);
4356 new_optimize = build_optimization_node ();
4363 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4365 if (old_optimize != new_optimize)
4366 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4369 cl_target_option_restore (&global_options, &cur_target);
4371 if (old_optimize != new_optimize)
4372 cl_optimization_restore (&global_options,
4373 TREE_OPTIMIZATION (old_optimize));
4379 /* Hook to determine if one function can safely inline another. */
4382 ix86_can_inline_p (tree caller, tree callee)
4385 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4386 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4388 /* If callee has no option attributes, then it is ok to inline. */
4392 /* If caller has no option attributes, but callee does then it is not ok to
4394 else if (!caller_tree)
4399 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4400 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4402 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4403 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4405 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4406 != callee_opts->x_ix86_isa_flags)
4409 /* See if we have the same non-isa options. */
4410 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4413 /* See if arch, tune, etc. are the same. */
4414 else if (caller_opts->arch != callee_opts->arch)
4417 else if (caller_opts->tune != callee_opts->tune)
4420 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4423 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4434 /* Remember the last target of ix86_set_current_function. */
4435 static GTY(()) tree ix86_previous_fndecl;
4437 /* Establish appropriate back-end context for processing the function
4438 FNDECL. The argument might be NULL to indicate processing at top
4439 level, outside of any function scope. */
4441 ix86_set_current_function (tree fndecl)
4443 /* Only change the context if the function changes. This hook is called
4444 several times in the course of compiling a function, and we don't want to
4445 slow things down too much or call target_reinit when it isn't safe. */
4446 if (fndecl && fndecl != ix86_previous_fndecl)
4448 tree old_tree = (ix86_previous_fndecl
4449 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4452 tree new_tree = (fndecl
4453 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4456 ix86_previous_fndecl = fndecl;
4457 if (old_tree == new_tree)
4462 cl_target_option_restore (&global_options,
4463 TREE_TARGET_OPTION (new_tree));
4469 struct cl_target_option *def
4470 = TREE_TARGET_OPTION (target_option_current_node);
4472 cl_target_option_restore (&global_options, def);
4479 /* Return true if this goes in large data/bss. */
4482 ix86_in_large_data_p (tree exp)
4484 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4487 /* Functions are never large data. */
4488 if (TREE_CODE (exp) == FUNCTION_DECL)
4491 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4493 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4494 if (strcmp (section, ".ldata") == 0
4495 || strcmp (section, ".lbss") == 0)
4501 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4503 /* If this is an incomplete type with size 0, then we can't put it
4504 in data because it might be too big when completed. */
4505 if (!size || size > ix86_section_threshold)
4512 /* Switch to the appropriate section for output of DECL.
4513 DECL is either a `VAR_DECL' node or a constant of some sort.
4514 RELOC indicates whether forming the initial value of DECL requires
4515 link-time relocations. */
4517 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4521 x86_64_elf_select_section (tree decl, int reloc,
4522 unsigned HOST_WIDE_INT align)
4524 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4525 && ix86_in_large_data_p (decl))
4527 const char *sname = NULL;
4528 unsigned int flags = SECTION_WRITE;
4529 switch (categorize_decl_for_section (decl, reloc))
4534 case SECCAT_DATA_REL:
4535 sname = ".ldata.rel";
4537 case SECCAT_DATA_REL_LOCAL:
4538 sname = ".ldata.rel.local";
4540 case SECCAT_DATA_REL_RO:
4541 sname = ".ldata.rel.ro";
4543 case SECCAT_DATA_REL_RO_LOCAL:
4544 sname = ".ldata.rel.ro.local";
4548 flags |= SECTION_BSS;
4551 case SECCAT_RODATA_MERGE_STR:
4552 case SECCAT_RODATA_MERGE_STR_INIT:
4553 case SECCAT_RODATA_MERGE_CONST:
4557 case SECCAT_SRODATA:
4564 /* We don't split these for medium model. Place them into
4565 default sections and hope for best. */
4570 /* We might get called with string constants, but get_named_section
4571 doesn't like them as they are not DECLs. Also, we need to set
4572 flags in that case. */
4574 return get_section (sname, flags, NULL);
4575 return get_named_section (decl, sname, reloc);
4578 return default_elf_select_section (decl, reloc, align);
4581 /* Build up a unique section name, expressed as a
4582 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4583 RELOC indicates whether the initial value of EXP requires
4584 link-time relocations. */
4586 static void ATTRIBUTE_UNUSED
4587 x86_64_elf_unique_section (tree decl, int reloc)
4589 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4590 && ix86_in_large_data_p (decl))
4592 const char *prefix = NULL;
4593 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4594 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4596 switch (categorize_decl_for_section (decl, reloc))
4599 case SECCAT_DATA_REL:
4600 case SECCAT_DATA_REL_LOCAL:
4601 case SECCAT_DATA_REL_RO:
4602 case SECCAT_DATA_REL_RO_LOCAL:
4603 prefix = one_only ? ".ld" : ".ldata";
4606 prefix = one_only ? ".lb" : ".lbss";
4609 case SECCAT_RODATA_MERGE_STR:
4610 case SECCAT_RODATA_MERGE_STR_INIT:
4611 case SECCAT_RODATA_MERGE_CONST:
4612 prefix = one_only ? ".lr" : ".lrodata";
4614 case SECCAT_SRODATA:
4621 /* We don't split these for medium model. Place them into
4622 default sections and hope for best. */
4627 const char *name, *linkonce;
4630 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4631 name = targetm.strip_name_encoding (name);
4633 /* If we're using one_only, then there needs to be a .gnu.linkonce
4634 prefix to the section name. */
4635 linkonce = one_only ? ".gnu.linkonce" : "";
4637 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4639 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4643 default_unique_section (decl, reloc);
4646 #ifdef COMMON_ASM_OP
4647 /* This says how to output assembler code to declare an
4648 uninitialized external linkage data object.
4650 For medium model x86-64 we need to use .largecomm opcode for
4653 x86_elf_aligned_common (FILE *file,
4654 const char *name, unsigned HOST_WIDE_INT size,
4657 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4658 && size > (unsigned int)ix86_section_threshold)
4659 fputs (".largecomm\t", file);
4661 fputs (COMMON_ASM_OP, file);
4662 assemble_name (file, name);
4663 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4664 size, align / BITS_PER_UNIT);
4668 /* Utility function for targets to use in implementing
4669 ASM_OUTPUT_ALIGNED_BSS. */
4672 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4673 const char *name, unsigned HOST_WIDE_INT size,
4676 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4677 && size > (unsigned int)ix86_section_threshold)
4678 switch_to_section (get_named_section (decl, ".lbss", 0));
4680 switch_to_section (bss_section);
4681 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4682 #ifdef ASM_DECLARE_OBJECT_NAME
4683 last_assemble_variable_decl = decl;
4684 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4686 /* Standard thing is just output label for the object. */
4687 ASM_OUTPUT_LABEL (file, name);
4688 #endif /* ASM_DECLARE_OBJECT_NAME */
4689 ASM_OUTPUT_SKIP (file, size ? size : 1);
4692 /* Decide whether we must probe the stack before any space allocation
4693 on this target. It's essentially TARGET_STACK_PROBE except when
4694 -fstack-check causes the stack to be already probed differently. */
4697 ix86_target_stack_probe (void)
4699 /* Do not probe the stack twice if static stack checking is enabled. */
4700 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4703 return TARGET_STACK_PROBE;
4706 /* Decide whether we can make a sibling call to a function. DECL is the
4707 declaration of the function being targeted by the call and EXP is the
4708 CALL_EXPR representing the call. */
4711 ix86_function_ok_for_sibcall (tree decl, tree exp)
4713 tree type, decl_or_type;
4716 /* If we are generating position-independent code, we cannot sibcall
4717 optimize any indirect call, or a direct call to a global function,
4718 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4722 && (!decl || !targetm.binds_local_p (decl)))
4725 /* If we need to align the outgoing stack, then sibcalling would
4726 unalign the stack, which may break the called function. */
4727 if (ix86_minimum_incoming_stack_boundary (true)
4728 < PREFERRED_STACK_BOUNDARY)
4733 decl_or_type = decl;
4734 type = TREE_TYPE (decl);
4738 /* We're looking at the CALL_EXPR, we need the type of the function. */
4739 type = CALL_EXPR_FN (exp); /* pointer expression */
4740 type = TREE_TYPE (type); /* pointer type */
4741 type = TREE_TYPE (type); /* function type */
4742 decl_or_type = type;
4745 /* Check that the return value locations are the same. Like
4746 if we are returning floats on the 80387 register stack, we cannot
4747 make a sibcall from a function that doesn't return a float to a
4748 function that does or, conversely, from a function that does return
4749 a float to a function that doesn't; the necessary stack adjustment
4750 would not be executed. This is also the place we notice
4751 differences in the return value ABI. Note that it is ok for one
4752 of the functions to have void return type as long as the return
4753 value of the other is passed in a register. */
4754 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4755 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4757 if (STACK_REG_P (a) || STACK_REG_P (b))
4759 if (!rtx_equal_p (a, b))
4762 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4764 /* Disable sibcall if we need to generate vzeroupper after
4766 if (TARGET_VZEROUPPER
4767 && cfun->machine->callee_return_avx256_p
4768 && !cfun->machine->caller_return_avx256_p)
4771 else if (!rtx_equal_p (a, b))
4776 /* The SYSV ABI has more call-clobbered registers;
4777 disallow sibcalls from MS to SYSV. */
4778 if (cfun->machine->call_abi == MS_ABI
4779 && ix86_function_type_abi (type) == SYSV_ABI)
4784 /* If this call is indirect, we'll need to be able to use a
4785 call-clobbered register for the address of the target function.
4786 Make sure that all such registers are not used for passing
4787 parameters. Note that DLLIMPORT functions are indirect. */
4789 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4791 if (ix86_function_regparm (type, NULL) >= 3)
4793 /* ??? Need to count the actual number of registers to be used,
4794 not the possible number of registers. Fix later. */
4800 /* Otherwise okay. That also includes certain types of indirect calls. */
4804 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4805 and "sseregparm" calling convention attributes;
4806 arguments as in struct attribute_spec.handler. */
4809 ix86_handle_cconv_attribute (tree *node, tree name,
4811 int flags ATTRIBUTE_UNUSED,
4814 if (TREE_CODE (*node) != FUNCTION_TYPE
4815 && TREE_CODE (*node) != METHOD_TYPE
4816 && TREE_CODE (*node) != FIELD_DECL
4817 && TREE_CODE (*node) != TYPE_DECL)
4819 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4821 *no_add_attrs = true;
4825 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4826 if (is_attribute_p ("regparm", name))
4830 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4832 error ("fastcall and regparm attributes are not compatible");
4835 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4837 error ("regparam and thiscall attributes are not compatible");
4840 cst = TREE_VALUE (args);
4841 if (TREE_CODE (cst) != INTEGER_CST)
4843 warning (OPT_Wattributes,
4844 "%qE attribute requires an integer constant argument",
4846 *no_add_attrs = true;
4848 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4850 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4852 *no_add_attrs = true;
4860 /* Do not warn when emulating the MS ABI. */
4861 if ((TREE_CODE (*node) != FUNCTION_TYPE
4862 && TREE_CODE (*node) != METHOD_TYPE)
4863 || ix86_function_type_abi (*node) != MS_ABI)
4864 warning (OPT_Wattributes, "%qE attribute ignored",
4866 *no_add_attrs = true;
4870 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4871 if (is_attribute_p ("fastcall", name))
4873 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4875 error ("fastcall and cdecl attributes are not compatible");
4877 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4879 error ("fastcall and stdcall attributes are not compatible");
4881 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4883 error ("fastcall and regparm attributes are not compatible");
4885 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4887 error ("fastcall and thiscall attributes are not compatible");
4891 /* Can combine stdcall with fastcall (redundant), regparm and
4893 else if (is_attribute_p ("stdcall", name))
4895 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4897 error ("stdcall and cdecl attributes are not compatible");
4899 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4901 error ("stdcall and fastcall attributes are not compatible");
4903 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4905 error ("stdcall and thiscall attributes are not compatible");
4909 /* Can combine cdecl with regparm and sseregparm. */
4910 else if (is_attribute_p ("cdecl", name))
4912 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4914 error ("stdcall and cdecl attributes are not compatible");
4916 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4918 error ("fastcall and cdecl attributes are not compatible");
4920 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4922 error ("cdecl and thiscall attributes are not compatible");
4925 else if (is_attribute_p ("thiscall", name))
4927 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4928 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4930 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4932 error ("stdcall and thiscall attributes are not compatible");
4934 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4936 error ("fastcall and thiscall attributes are not compatible");
4938 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4940 error ("cdecl and thiscall attributes are not compatible");
4944 /* Can combine sseregparm with all attributes. */
4949 /* This function determines from TYPE the calling-convention. */
4952 ix86_get_callcvt (const_tree type)
4954 unsigned int ret = 0;
4959 return IX86_CALLCVT_CDECL;
4961 attrs = TYPE_ATTRIBUTES (type);
4962 if (attrs != NULL_TREE)
4964 if (lookup_attribute ("cdecl", attrs))
4965 ret |= IX86_CALLCVT_CDECL;
4966 else if (lookup_attribute ("stdcall", attrs))
4967 ret |= IX86_CALLCVT_STDCALL;
4968 else if (lookup_attribute ("fastcall", attrs))
4969 ret |= IX86_CALLCVT_FASTCALL;
4970 else if (lookup_attribute ("thiscall", attrs))
4971 ret |= IX86_CALLCVT_THISCALL;
4973 /* Regparam isn't allowed for thiscall and fastcall. */
4974 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4976 if (lookup_attribute ("regparm", attrs))
4977 ret |= IX86_CALLCVT_REGPARM;
4978 if (lookup_attribute ("sseregparm", attrs))
4979 ret |= IX86_CALLCVT_SSEREGPARM;
4982 if (IX86_BASE_CALLCVT(ret) != 0)
4986 is_stdarg = stdarg_p (type);
4987 if (TARGET_RTD && !is_stdarg)
4988 return IX86_CALLCVT_STDCALL | ret;
4992 || TREE_CODE (type) != METHOD_TYPE
4993 || ix86_function_type_abi (type) != MS_ABI)
4994 return IX86_CALLCVT_CDECL | ret;
4996 return IX86_CALLCVT_THISCALL;
4999 /* Return 0 if the attributes for two types are incompatible, 1 if they
5000 are compatible, and 2 if they are nearly compatible (which causes a
5001 warning to be generated). */
5004 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5006 unsigned int ccvt1, ccvt2;
5008 if (TREE_CODE (type1) != FUNCTION_TYPE
5009 && TREE_CODE (type1) != METHOD_TYPE)
5012 ccvt1 = ix86_get_callcvt (type1);
5013 ccvt2 = ix86_get_callcvt (type2);
5016 if (ix86_function_regparm (type1, NULL)
5017 != ix86_function_regparm (type2, NULL))
5023 /* Return the regparm value for a function with the indicated TYPE and DECL.
5024 DECL may be NULL when calling function indirectly
5025 or considering a libcall. */
5028 ix86_function_regparm (const_tree type, const_tree decl)
5035 return (ix86_function_type_abi (type) == SYSV_ABI
5036 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5037 ccvt = ix86_get_callcvt (type);
5038 regparm = ix86_regparm;
5040 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5042 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5045 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5049 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5051 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5054 /* Use register calling convention for local functions when possible. */
5056 && TREE_CODE (decl) == FUNCTION_DECL
5058 && !(profile_flag && !flag_fentry))
5060 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5061 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5062 if (i && i->local && i->can_change_signature)
5064 int local_regparm, globals = 0, regno;
5066 /* Make sure no regparm register is taken by a
5067 fixed register variable. */
5068 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5069 if (fixed_regs[local_regparm])
5072 /* We don't want to use regparm(3) for nested functions as
5073 these use a static chain pointer in the third argument. */
5074 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5077 /* In 32-bit mode save a register for the split stack. */
5078 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5081 /* Each fixed register usage increases register pressure,
5082 so less registers should be used for argument passing.
5083 This functionality can be overriden by an explicit
5085 for (regno = 0; regno <= DI_REG; regno++)
5086 if (fixed_regs[regno])
5090 = globals < local_regparm ? local_regparm - globals : 0;
5092 if (local_regparm > regparm)
5093 regparm = local_regparm;
5100 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5101 DFmode (2) arguments in SSE registers for a function with the
5102 indicated TYPE and DECL. DECL may be NULL when calling function
5103 indirectly or considering a libcall. Otherwise return 0. */
5106 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5108 gcc_assert (!TARGET_64BIT);
5110 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5111 by the sseregparm attribute. */
5112 if (TARGET_SSEREGPARM
5113 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5120 error ("calling %qD with attribute sseregparm without "
5121 "SSE/SSE2 enabled", decl);
5123 error ("calling %qT with attribute sseregparm without "
5124 "SSE/SSE2 enabled", type);
5132 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5133 (and DFmode for SSE2) arguments in SSE registers. */
5134 if (decl && TARGET_SSE_MATH && optimize
5135 && !(profile_flag && !flag_fentry))
5137 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5138 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5139 if (i && i->local && i->can_change_signature)
5140 return TARGET_SSE2 ? 2 : 1;
5146 /* Return true if EAX is live at the start of the function. Used by
5147 ix86_expand_prologue to determine if we need special help before
5148 calling allocate_stack_worker. */
5151 ix86_eax_live_at_start_p (void)
5153 /* Cheat. Don't bother working forward from ix86_function_regparm
5154 to the function type to whether an actual argument is located in
5155 eax. Instead just look at cfg info, which is still close enough
5156 to correct at this point. This gives false positives for broken
5157 functions that might use uninitialized data that happens to be
5158 allocated in eax, but who cares? */
5159 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5163 ix86_keep_aggregate_return_pointer (tree fntype)
5169 attr = lookup_attribute ("callee_pop_aggregate_return",
5170 TYPE_ATTRIBUTES (fntype));
5172 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5174 /* For 32-bit MS-ABI the default is to keep aggregate
5176 if (ix86_function_type_abi (fntype) == MS_ABI)
5179 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5182 /* Value is the number of bytes of arguments automatically
5183 popped when returning from a subroutine call.
5184 FUNDECL is the declaration node of the function (as a tree),
5185 FUNTYPE is the data type of the function (as a tree),
5186 or for a library call it is an identifier node for the subroutine name.
5187 SIZE is the number of bytes of arguments passed on the stack.
5189 On the 80386, the RTD insn may be used to pop them if the number
5190 of args is fixed, but if the number is variable then the caller
5191 must pop them all. RTD can't be used for library calls now
5192 because the library is compiled with the Unix compiler.
5193 Use of RTD is a selectable option, since it is incompatible with
5194 standard Unix calling sequences. If the option is not selected,
5195 the caller must always pop the args.
5197 The attribute stdcall is equivalent to RTD on a per module basis. */
5200 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5204 /* None of the 64-bit ABIs pop arguments. */
5208 ccvt = ix86_get_callcvt (funtype);
5210 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5211 | IX86_CALLCVT_THISCALL)) != 0
5212 && ! stdarg_p (funtype))
5215 /* Lose any fake structure return argument if it is passed on the stack. */
5216 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5217 && !ix86_keep_aggregate_return_pointer (funtype))
5219 int nregs = ix86_function_regparm (funtype, fundecl);
5221 return GET_MODE_SIZE (Pmode);
5227 /* Argument support functions. */
5229 /* Return true when register may be used to pass function parameters. */
5231 ix86_function_arg_regno_p (int regno)
5234 const int *parm_regs;
5239 return (regno < REGPARM_MAX
5240 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5242 return (regno < REGPARM_MAX
5243 || (TARGET_MMX && MMX_REGNO_P (regno)
5244 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5245 || (TARGET_SSE && SSE_REGNO_P (regno)
5246 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5251 if (SSE_REGNO_P (regno) && TARGET_SSE)
5256 if (TARGET_SSE && SSE_REGNO_P (regno)
5257 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5261 /* TODO: The function should depend on current function ABI but
5262 builtins.c would need updating then. Therefore we use the
5265 /* RAX is used as hidden argument to va_arg functions. */
5266 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5269 if (ix86_abi == MS_ABI)
5270 parm_regs = x86_64_ms_abi_int_parameter_registers;
5272 parm_regs = x86_64_int_parameter_registers;
5273 for (i = 0; i < (ix86_abi == MS_ABI
5274 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5275 if (regno == parm_regs[i])
5280 /* Return if we do not know how to pass TYPE solely in registers. */
5283 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5285 if (must_pass_in_stack_var_size_or_pad (mode, type))
5288 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5289 The layout_type routine is crafty and tries to trick us into passing
5290 currently unsupported vector types on the stack by using TImode. */
5291 return (!TARGET_64BIT && mode == TImode
5292 && type && TREE_CODE (type) != VECTOR_TYPE);
5295 /* It returns the size, in bytes, of the area reserved for arguments passed
5296 in registers for the function represented by fndecl dependent to the used
5299 ix86_reg_parm_stack_space (const_tree fndecl)
5301 enum calling_abi call_abi = SYSV_ABI;
5302 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5303 call_abi = ix86_function_abi (fndecl);
5305 call_abi = ix86_function_type_abi (fndecl);
5306 if (TARGET_64BIT && call_abi == MS_ABI)
5311 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5314 ix86_function_type_abi (const_tree fntype)
5316 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5318 enum calling_abi abi = ix86_abi;
5319 if (abi == SYSV_ABI)
5321 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5324 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5332 ix86_function_ms_hook_prologue (const_tree fn)
5334 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5336 if (decl_function_context (fn) != NULL_TREE)
5337 error_at (DECL_SOURCE_LOCATION (fn),
5338 "ms_hook_prologue is not compatible with nested function");
5345 static enum calling_abi
5346 ix86_function_abi (const_tree fndecl)
5350 return ix86_function_type_abi (TREE_TYPE (fndecl));
5353 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5356 ix86_cfun_abi (void)
5360 return cfun->machine->call_abi;
5363 /* Write the extra assembler code needed to declare a function properly. */
5366 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5369 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5373 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5374 unsigned int filler_cc = 0xcccccccc;
5376 for (i = 0; i < filler_count; i += 4)
5377 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5380 #ifdef SUBTARGET_ASM_UNWIND_INIT
5381 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5384 ASM_OUTPUT_LABEL (asm_out_file, fname);
5386 /* Output magic byte marker, if hot-patch attribute is set. */
5391 /* leaq [%rsp + 0], %rsp */
5392 asm_fprintf (asm_out_file, ASM_BYTE
5393 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5397 /* movl.s %edi, %edi
5399 movl.s %esp, %ebp */
5400 asm_fprintf (asm_out_file, ASM_BYTE
5401 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5407 extern void init_regs (void);
5409 /* Implementation of call abi switching target hook. Specific to FNDECL
5410 the specific call register sets are set. See also
5411 ix86_conditional_register_usage for more details. */
5413 ix86_call_abi_override (const_tree fndecl)
5415 if (fndecl == NULL_TREE)
5416 cfun->machine->call_abi = ix86_abi;
5418 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5421 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5422 expensive re-initialization of init_regs each time we switch function context
5423 since this is needed only during RTL expansion. */
5425 ix86_maybe_switch_abi (void)
5428 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5432 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5433 for a call to a function whose data type is FNTYPE.
5434 For a library call, FNTYPE is 0. */
5437 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5438 tree fntype, /* tree ptr for function decl */
5439 rtx libname, /* SYMBOL_REF of library name or 0 */
5443 struct cgraph_local_info *i;
5446 memset (cum, 0, sizeof (*cum));
5448 /* Initialize for the current callee. */
5451 cfun->machine->callee_pass_avx256_p = false;
5452 cfun->machine->callee_return_avx256_p = false;
5457 i = cgraph_local_info (fndecl);
5458 cum->call_abi = ix86_function_abi (fndecl);
5459 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5464 cum->call_abi = ix86_function_type_abi (fntype);
5466 fnret_type = TREE_TYPE (fntype);
5471 if (TARGET_VZEROUPPER && fnret_type)
5473 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5475 if (function_pass_avx256_p (fnret_value))
5477 /* The return value of this function uses 256bit AVX modes. */
5479 cfun->machine->callee_return_avx256_p = true;
5481 cfun->machine->caller_return_avx256_p = true;
5485 cum->caller = caller;
5487 /* Set up the number of registers to use for passing arguments. */
5489 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5490 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5491 "or subtarget optimization implying it");
5492 cum->nregs = ix86_regparm;
5495 cum->nregs = (cum->call_abi == SYSV_ABI
5496 ? X86_64_REGPARM_MAX
5497 : X86_64_MS_REGPARM_MAX);
5501 cum->sse_nregs = SSE_REGPARM_MAX;
5504 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5505 ? X86_64_SSE_REGPARM_MAX
5506 : X86_64_MS_SSE_REGPARM_MAX);
5510 cum->mmx_nregs = MMX_REGPARM_MAX;
5511 cum->warn_avx = true;
5512 cum->warn_sse = true;
5513 cum->warn_mmx = true;
5515 /* Because type might mismatch in between caller and callee, we need to
5516 use actual type of function for local calls.
5517 FIXME: cgraph_analyze can be told to actually record if function uses
5518 va_start so for local functions maybe_vaarg can be made aggressive
5520 FIXME: once typesytem is fixed, we won't need this code anymore. */
5521 if (i && i->local && i->can_change_signature)
5522 fntype = TREE_TYPE (fndecl);
5523 cum->maybe_vaarg = (fntype
5524 ? (!prototype_p (fntype) || stdarg_p (fntype))
5529 /* If there are variable arguments, then we won't pass anything
5530 in registers in 32-bit mode. */
5531 if (stdarg_p (fntype))
5542 /* Use ecx and edx registers if function has fastcall attribute,
5543 else look for regparm information. */
5546 unsigned int ccvt = ix86_get_callcvt (fntype);
5547 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5550 cum->fastcall = 1; /* Same first register as in fastcall. */
5552 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5558 cum->nregs = ix86_function_regparm (fntype, fndecl);
5561 /* Set up the number of SSE registers used for passing SFmode
5562 and DFmode arguments. Warn for mismatching ABI. */
5563 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5567 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5568 But in the case of vector types, it is some vector mode.
5570 When we have only some of our vector isa extensions enabled, then there
5571 are some modes for which vector_mode_supported_p is false. For these
5572 modes, the generic vector support in gcc will choose some non-vector mode
5573 in order to implement the type. By computing the natural mode, we'll
5574 select the proper ABI location for the operand and not depend on whatever
5575 the middle-end decides to do with these vector types.
5577 The midde-end can't deal with the vector types > 16 bytes. In this
5578 case, we return the original mode and warn ABI change if CUM isn't
5581 static enum machine_mode
5582 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5584 enum machine_mode mode = TYPE_MODE (type);
5586 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5588 HOST_WIDE_INT size = int_size_in_bytes (type);
5589 if ((size == 8 || size == 16 || size == 32)
5590 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5591 && TYPE_VECTOR_SUBPARTS (type) > 1)
5593 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5595 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5596 mode = MIN_MODE_VECTOR_FLOAT;
5598 mode = MIN_MODE_VECTOR_INT;
5600 /* Get the mode which has this inner mode and number of units. */
5601 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5602 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5603 && GET_MODE_INNER (mode) == innermode)
5605 if (size == 32 && !TARGET_AVX)
5607 static bool warnedavx;
5614 warning (0, "AVX vector argument without AVX "
5615 "enabled changes the ABI");
5617 return TYPE_MODE (type);
5630 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5631 this may not agree with the mode that the type system has chosen for the
5632 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5633 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5636 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5641 if (orig_mode != BLKmode)
5642 tmp = gen_rtx_REG (orig_mode, regno);
5645 tmp = gen_rtx_REG (mode, regno);
5646 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5647 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5653 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5654 of this code is to classify each 8bytes of incoming argument by the register
5655 class and assign registers accordingly. */
5657 /* Return the union class of CLASS1 and CLASS2.
5658 See the x86-64 PS ABI for details. */
5660 static enum x86_64_reg_class
5661 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5663 /* Rule #1: If both classes are equal, this is the resulting class. */
5664 if (class1 == class2)
5667 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5669 if (class1 == X86_64_NO_CLASS)
5671 if (class2 == X86_64_NO_CLASS)
5674 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5675 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5676 return X86_64_MEMORY_CLASS;
5678 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5679 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5680 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5681 return X86_64_INTEGERSI_CLASS;
5682 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5683 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5684 return X86_64_INTEGER_CLASS;
5686 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5688 if (class1 == X86_64_X87_CLASS
5689 || class1 == X86_64_X87UP_CLASS
5690 || class1 == X86_64_COMPLEX_X87_CLASS
5691 || class2 == X86_64_X87_CLASS
5692 || class2 == X86_64_X87UP_CLASS
5693 || class2 == X86_64_COMPLEX_X87_CLASS)
5694 return X86_64_MEMORY_CLASS;
5696 /* Rule #6: Otherwise class SSE is used. */
5697 return X86_64_SSE_CLASS;
5700 /* Classify the argument of type TYPE and mode MODE.
5701 CLASSES will be filled by the register class used to pass each word
5702 of the operand. The number of words is returned. In case the parameter
5703 should be passed in memory, 0 is returned. As a special case for zero
5704 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5706 BIT_OFFSET is used internally for handling records and specifies offset
5707 of the offset in bits modulo 256 to avoid overflow cases.
5709 See the x86-64 PS ABI for details.
5713 classify_argument (enum machine_mode mode, const_tree type,
5714 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5716 HOST_WIDE_INT bytes =
5717 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5718 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5720 /* Variable sized entities are always passed/returned in memory. */
5724 if (mode != VOIDmode
5725 && targetm.calls.must_pass_in_stack (mode, type))
5728 if (type && AGGREGATE_TYPE_P (type))
5732 enum x86_64_reg_class subclasses[MAX_CLASSES];
5734 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5738 for (i = 0; i < words; i++)
5739 classes[i] = X86_64_NO_CLASS;
5741 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5742 signalize memory class, so handle it as special case. */
5745 classes[0] = X86_64_NO_CLASS;
5749 /* Classify each field of record and merge classes. */
5750 switch (TREE_CODE (type))
5753 /* And now merge the fields of structure. */
5754 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5756 if (TREE_CODE (field) == FIELD_DECL)
5760 if (TREE_TYPE (field) == error_mark_node)
5763 /* Bitfields are always classified as integer. Handle them
5764 early, since later code would consider them to be
5765 misaligned integers. */
5766 if (DECL_BIT_FIELD (field))
5768 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5769 i < ((int_bit_position (field) + (bit_offset % 64))
5770 + tree_low_cst (DECL_SIZE (field), 0)
5773 merge_classes (X86_64_INTEGER_CLASS,
5780 type = TREE_TYPE (field);
5782 /* Flexible array member is ignored. */
5783 if (TYPE_MODE (type) == BLKmode
5784 && TREE_CODE (type) == ARRAY_TYPE
5785 && TYPE_SIZE (type) == NULL_TREE
5786 && TYPE_DOMAIN (type) != NULL_TREE
5787 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5792 if (!warned && warn_psabi)
5795 inform (input_location,
5796 "the ABI of passing struct with"
5797 " a flexible array member has"
5798 " changed in GCC 4.4");
5802 num = classify_argument (TYPE_MODE (type), type,
5804 (int_bit_position (field)
5805 + bit_offset) % 256);
5808 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5809 for (i = 0; i < num && (i + pos) < words; i++)
5811 merge_classes (subclasses[i], classes[i + pos]);
5818 /* Arrays are handled as small records. */
5821 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5822 TREE_TYPE (type), subclasses, bit_offset);
5826 /* The partial classes are now full classes. */
5827 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5828 subclasses[0] = X86_64_SSE_CLASS;
5829 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5830 && !((bit_offset % 64) == 0 && bytes == 4))
5831 subclasses[0] = X86_64_INTEGER_CLASS;
5833 for (i = 0; i < words; i++)
5834 classes[i] = subclasses[i % num];
5839 case QUAL_UNION_TYPE:
5840 /* Unions are similar to RECORD_TYPE but offset is always 0.
5842 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5844 if (TREE_CODE (field) == FIELD_DECL)
5848 if (TREE_TYPE (field) == error_mark_node)
5851 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5852 TREE_TYPE (field), subclasses,
5856 for (i = 0; i < num; i++)
5857 classes[i] = merge_classes (subclasses[i], classes[i]);
5868 /* When size > 16 bytes, if the first one isn't
5869 X86_64_SSE_CLASS or any other ones aren't
5870 X86_64_SSEUP_CLASS, everything should be passed in
5872 if (classes[0] != X86_64_SSE_CLASS)
5875 for (i = 1; i < words; i++)
5876 if (classes[i] != X86_64_SSEUP_CLASS)
5880 /* Final merger cleanup. */
5881 for (i = 0; i < words; i++)
5883 /* If one class is MEMORY, everything should be passed in
5885 if (classes[i] == X86_64_MEMORY_CLASS)
5888 /* The X86_64_SSEUP_CLASS should be always preceded by
5889 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5890 if (classes[i] == X86_64_SSEUP_CLASS
5891 && classes[i - 1] != X86_64_SSE_CLASS
5892 && classes[i - 1] != X86_64_SSEUP_CLASS)
5894 /* The first one should never be X86_64_SSEUP_CLASS. */
5895 gcc_assert (i != 0);
5896 classes[i] = X86_64_SSE_CLASS;
5899 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5900 everything should be passed in memory. */
5901 if (classes[i] == X86_64_X87UP_CLASS
5902 && (classes[i - 1] != X86_64_X87_CLASS))
5906 /* The first one should never be X86_64_X87UP_CLASS. */
5907 gcc_assert (i != 0);
5908 if (!warned && warn_psabi)
5911 inform (input_location,
5912 "the ABI of passing union with long double"
5913 " has changed in GCC 4.4");
5921 /* Compute alignment needed. We align all types to natural boundaries with
5922 exception of XFmode that is aligned to 64bits. */
5923 if (mode != VOIDmode && mode != BLKmode)
5925 int mode_alignment = GET_MODE_BITSIZE (mode);
5928 mode_alignment = 128;
5929 else if (mode == XCmode)
5930 mode_alignment = 256;
5931 if (COMPLEX_MODE_P (mode))
5932 mode_alignment /= 2;
5933 /* Misaligned fields are always returned in memory. */
5934 if (bit_offset % mode_alignment)
5938 /* for V1xx modes, just use the base mode */
5939 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5940 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5941 mode = GET_MODE_INNER (mode);
5943 /* Classification of atomic types. */
5948 classes[0] = X86_64_SSE_CLASS;
5951 classes[0] = X86_64_SSE_CLASS;
5952 classes[1] = X86_64_SSEUP_CLASS;
5962 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5966 classes[0] = X86_64_INTEGERSI_CLASS;
5969 else if (size <= 64)
5971 classes[0] = X86_64_INTEGER_CLASS;
5974 else if (size <= 64+32)
5976 classes[0] = X86_64_INTEGER_CLASS;
5977 classes[1] = X86_64_INTEGERSI_CLASS;
5980 else if (size <= 64+64)
5982 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5990 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5994 /* OImode shouldn't be used directly. */
5999 if (!(bit_offset % 64))
6000 classes[0] = X86_64_SSESF_CLASS;
6002 classes[0] = X86_64_SSE_CLASS;
6005 classes[0] = X86_64_SSEDF_CLASS;
6008 classes[0] = X86_64_X87_CLASS;
6009 classes[1] = X86_64_X87UP_CLASS;
6012 classes[0] = X86_64_SSE_CLASS;
6013 classes[1] = X86_64_SSEUP_CLASS;
6016 classes[0] = X86_64_SSE_CLASS;
6017 if (!(bit_offset % 64))
6023 if (!warned && warn_psabi)
6026 inform (input_location,
6027 "the ABI of passing structure with complex float"
6028 " member has changed in GCC 4.4");
6030 classes[1] = X86_64_SSESF_CLASS;
6034 classes[0] = X86_64_SSEDF_CLASS;
6035 classes[1] = X86_64_SSEDF_CLASS;
6038 classes[0] = X86_64_COMPLEX_X87_CLASS;
6041 /* This modes is larger than 16 bytes. */
6049 classes[0] = X86_64_SSE_CLASS;
6050 classes[1] = X86_64_SSEUP_CLASS;
6051 classes[2] = X86_64_SSEUP_CLASS;
6052 classes[3] = X86_64_SSEUP_CLASS;
6060 classes[0] = X86_64_SSE_CLASS;
6061 classes[1] = X86_64_SSEUP_CLASS;
6069 classes[0] = X86_64_SSE_CLASS;
6075 gcc_assert (VECTOR_MODE_P (mode));
6080 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6082 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6083 classes[0] = X86_64_INTEGERSI_CLASS;
6085 classes[0] = X86_64_INTEGER_CLASS;
6086 classes[1] = X86_64_INTEGER_CLASS;
6087 return 1 + (bytes > 8);
6091 /* Examine the argument and return set number of register required in each
6092 class. Return 0 iff parameter should be passed in memory. */
6094 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6095 int *int_nregs, int *sse_nregs)
6097 enum x86_64_reg_class regclass[MAX_CLASSES];
6098 int n = classify_argument (mode, type, regclass, 0);
6104 for (n--; n >= 0; n--)
6105 switch (regclass[n])
6107 case X86_64_INTEGER_CLASS:
6108 case X86_64_INTEGERSI_CLASS:
6111 case X86_64_SSE_CLASS:
6112 case X86_64_SSESF_CLASS:
6113 case X86_64_SSEDF_CLASS:
6116 case X86_64_NO_CLASS:
6117 case X86_64_SSEUP_CLASS:
6119 case X86_64_X87_CLASS:
6120 case X86_64_X87UP_CLASS:
6124 case X86_64_COMPLEX_X87_CLASS:
6125 return in_return ? 2 : 0;
6126 case X86_64_MEMORY_CLASS:
6132 /* Construct container for the argument used by GCC interface. See
6133 FUNCTION_ARG for the detailed description. */
6136 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6137 const_tree type, int in_return, int nintregs, int nsseregs,
6138 const int *intreg, int sse_regno)
6140 /* The following variables hold the static issued_error state. */
6141 static bool issued_sse_arg_error;
6142 static bool issued_sse_ret_error;
6143 static bool issued_x87_ret_error;
6145 enum machine_mode tmpmode;
6147 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6148 enum x86_64_reg_class regclass[MAX_CLASSES];
6152 int needed_sseregs, needed_intregs;
6153 rtx exp[MAX_CLASSES];
6156 n = classify_argument (mode, type, regclass, 0);
6159 if (!examine_argument (mode, type, in_return, &needed_intregs,
6162 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6165 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6166 some less clueful developer tries to use floating-point anyway. */
6167 if (needed_sseregs && !TARGET_SSE)
6171 if (!issued_sse_ret_error)
6173 error ("SSE register return with SSE disabled");
6174 issued_sse_ret_error = true;
6177 else if (!issued_sse_arg_error)
6179 error ("SSE register argument with SSE disabled");
6180 issued_sse_arg_error = true;
6185 /* Likewise, error if the ABI requires us to return values in the
6186 x87 registers and the user specified -mno-80387. */
6187 if (!TARGET_80387 && in_return)
6188 for (i = 0; i < n; i++)
6189 if (regclass[i] == X86_64_X87_CLASS
6190 || regclass[i] == X86_64_X87UP_CLASS
6191 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6193 if (!issued_x87_ret_error)
6195 error ("x87 register return with x87 disabled");
6196 issued_x87_ret_error = true;
6201 /* First construct simple cases. Avoid SCmode, since we want to use
6202 single register to pass this type. */
6203 if (n == 1 && mode != SCmode)
6204 switch (regclass[0])
6206 case X86_64_INTEGER_CLASS:
6207 case X86_64_INTEGERSI_CLASS:
6208 return gen_rtx_REG (mode, intreg[0]);
6209 case X86_64_SSE_CLASS:
6210 case X86_64_SSESF_CLASS:
6211 case X86_64_SSEDF_CLASS:
6212 if (mode != BLKmode)
6213 return gen_reg_or_parallel (mode, orig_mode,
6214 SSE_REGNO (sse_regno));
6216 case X86_64_X87_CLASS:
6217 case X86_64_COMPLEX_X87_CLASS:
6218 return gen_rtx_REG (mode, FIRST_STACK_REG);
6219 case X86_64_NO_CLASS:
6220 /* Zero sized array, struct or class. */
6225 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6226 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6227 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6229 && regclass[0] == X86_64_SSE_CLASS
6230 && regclass[1] == X86_64_SSEUP_CLASS
6231 && regclass[2] == X86_64_SSEUP_CLASS
6232 && regclass[3] == X86_64_SSEUP_CLASS
6234 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6237 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6238 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6239 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6240 && regclass[1] == X86_64_INTEGER_CLASS
6241 && (mode == CDImode || mode == TImode || mode == TFmode)
6242 && intreg[0] + 1 == intreg[1])
6243 return gen_rtx_REG (mode, intreg[0]);
6245 /* Otherwise figure out the entries of the PARALLEL. */
6246 for (i = 0; i < n; i++)
6250 switch (regclass[i])
6252 case X86_64_NO_CLASS:
6254 case X86_64_INTEGER_CLASS:
6255 case X86_64_INTEGERSI_CLASS:
6256 /* Merge TImodes on aligned occasions here too. */
6257 if (i * 8 + 8 > bytes)
6258 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6259 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6263 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6264 if (tmpmode == BLKmode)
6266 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6267 gen_rtx_REG (tmpmode, *intreg),
6271 case X86_64_SSESF_CLASS:
6272 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6273 gen_rtx_REG (SFmode,
6274 SSE_REGNO (sse_regno)),
6278 case X86_64_SSEDF_CLASS:
6279 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6280 gen_rtx_REG (DFmode,
6281 SSE_REGNO (sse_regno)),
6285 case X86_64_SSE_CLASS:
6293 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6303 && regclass[1] == X86_64_SSEUP_CLASS
6304 && regclass[2] == X86_64_SSEUP_CLASS
6305 && regclass[3] == X86_64_SSEUP_CLASS);
6312 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6313 gen_rtx_REG (tmpmode,
6314 SSE_REGNO (sse_regno)),
6323 /* Empty aligned struct, union or class. */
6327 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6328 for (i = 0; i < nexps; i++)
6329 XVECEXP (ret, 0, i) = exp [i];
6333 /* Update the data in CUM to advance over an argument of mode MODE
6334 and data type TYPE. (TYPE is null for libcalls where that information
6335 may not be available.) */
6338 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6339 const_tree type, HOST_WIDE_INT bytes,
6340 HOST_WIDE_INT words)
6356 cum->words += words;
6357 cum->nregs -= words;
6358 cum->regno += words;
6360 if (cum->nregs <= 0)
6368 /* OImode shouldn't be used directly. */
6372 if (cum->float_in_sse < 2)
6375 if (cum->float_in_sse < 1)
6392 if (!type || !AGGREGATE_TYPE_P (type))
6394 cum->sse_words += words;
6395 cum->sse_nregs -= 1;
6396 cum->sse_regno += 1;
6397 if (cum->sse_nregs <= 0)
6411 if (!type || !AGGREGATE_TYPE_P (type))
6413 cum->mmx_words += words;
6414 cum->mmx_nregs -= 1;
6415 cum->mmx_regno += 1;
6416 if (cum->mmx_nregs <= 0)
6427 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6428 const_tree type, HOST_WIDE_INT words, bool named)
6430 int int_nregs, sse_nregs;
6432 /* Unnamed 256bit vector mode parameters are passed on stack. */
6433 if (!named && VALID_AVX256_REG_MODE (mode))
6436 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6437 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6439 cum->nregs -= int_nregs;
6440 cum->sse_nregs -= sse_nregs;
6441 cum->regno += int_nregs;
6442 cum->sse_regno += sse_nregs;
6446 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6447 cum->words = (cum->words + align - 1) & ~(align - 1);
6448 cum->words += words;
6453 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6454 HOST_WIDE_INT words)
6456 /* Otherwise, this should be passed indirect. */
6457 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6459 cum->words += words;
6467 /* Update the data in CUM to advance over an argument of mode MODE and
6468 data type TYPE. (TYPE is null for libcalls where that information
6469 may not be available.) */
6472 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6473 const_tree type, bool named)
6475 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6476 HOST_WIDE_INT bytes, words;
6478 if (mode == BLKmode)
6479 bytes = int_size_in_bytes (type);
6481 bytes = GET_MODE_SIZE (mode);
6482 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6485 mode = type_natural_mode (type, NULL);
6487 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6488 function_arg_advance_ms_64 (cum, bytes, words);
6489 else if (TARGET_64BIT)
6490 function_arg_advance_64 (cum, mode, type, words, named);
6492 function_arg_advance_32 (cum, mode, type, bytes, words);
6495 /* Define where to put the arguments to a function.
6496 Value is zero to push the argument on the stack,
6497 or a hard register in which to store the argument.
6499 MODE is the argument's machine mode.
6500 TYPE is the data type of the argument (as a tree).
6501 This is null for libcalls where that information may
6503 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6504 the preceding args and about the function being called.
6505 NAMED is nonzero if this argument is a named parameter
6506 (otherwise it is an extra parameter matching an ellipsis). */
6509 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6510 enum machine_mode orig_mode, const_tree type,
6511 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6513 static bool warnedsse, warnedmmx;
6515 /* Avoid the AL settings for the Unix64 ABI. */
6516 if (mode == VOIDmode)
6532 if (words <= cum->nregs)
6534 int regno = cum->regno;
6536 /* Fastcall allocates the first two DWORD (SImode) or
6537 smaller arguments to ECX and EDX if it isn't an
6543 || (type && AGGREGATE_TYPE_P (type)))
6546 /* ECX not EAX is the first allocated register. */
6547 if (regno == AX_REG)
6550 return gen_rtx_REG (mode, regno);
6555 if (cum->float_in_sse < 2)
6558 if (cum->float_in_sse < 1)
6562 /* In 32bit, we pass TImode in xmm registers. */
6569 if (!type || !AGGREGATE_TYPE_P (type))
6571 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6574 warning (0, "SSE vector argument without SSE enabled "
6578 return gen_reg_or_parallel (mode, orig_mode,
6579 cum->sse_regno + FIRST_SSE_REG);
6584 /* OImode shouldn't be used directly. */
6593 if (!type || !AGGREGATE_TYPE_P (type))
6596 return gen_reg_or_parallel (mode, orig_mode,
6597 cum->sse_regno + FIRST_SSE_REG);
6607 if (!type || !AGGREGATE_TYPE_P (type))
6609 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6612 warning (0, "MMX vector argument without MMX enabled "
6616 return gen_reg_or_parallel (mode, orig_mode,
6617 cum->mmx_regno + FIRST_MMX_REG);
6626 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6627 enum machine_mode orig_mode, const_tree type, bool named)
6629 /* Handle a hidden AL argument containing number of registers
6630 for varargs x86-64 functions. */
6631 if (mode == VOIDmode)
6632 return GEN_INT (cum->maybe_vaarg
6633 ? (cum->sse_nregs < 0
6634 ? X86_64_SSE_REGPARM_MAX
6649 /* Unnamed 256bit vector mode parameters are passed on stack. */
6655 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6657 &x86_64_int_parameter_registers [cum->regno],
6662 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6663 enum machine_mode orig_mode, bool named,
6664 HOST_WIDE_INT bytes)
6668 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6669 We use value of -2 to specify that current function call is MSABI. */
6670 if (mode == VOIDmode)
6671 return GEN_INT (-2);
6673 /* If we've run out of registers, it goes on the stack. */
6674 if (cum->nregs == 0)
6677 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6679 /* Only floating point modes are passed in anything but integer regs. */
6680 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6683 regno = cum->regno + FIRST_SSE_REG;
6688 /* Unnamed floating parameters are passed in both the
6689 SSE and integer registers. */
6690 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6691 t2 = gen_rtx_REG (mode, regno);
6692 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6693 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6694 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6697 /* Handle aggregated types passed in register. */
6698 if (orig_mode == BLKmode)
6700 if (bytes > 0 && bytes <= 8)
6701 mode = (bytes > 4 ? DImode : SImode);
6702 if (mode == BLKmode)
6706 return gen_reg_or_parallel (mode, orig_mode, regno);
6709 /* Return where to put the arguments to a function.
6710 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6712 MODE is the argument's machine mode. TYPE is the data type of the
6713 argument. It is null for libcalls where that information may not be
6714 available. CUM gives information about the preceding args and about
6715 the function being called. NAMED is nonzero if this argument is a
6716 named parameter (otherwise it is an extra parameter matching an
6720 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6721 const_tree type, bool named)
6723 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6724 enum machine_mode mode = omode;
6725 HOST_WIDE_INT bytes, words;
6728 if (mode == BLKmode)
6729 bytes = int_size_in_bytes (type);
6731 bytes = GET_MODE_SIZE (mode);
6732 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6734 /* To simplify the code below, represent vector types with a vector mode
6735 even if MMX/SSE are not active. */
6736 if (type && TREE_CODE (type) == VECTOR_TYPE)
6737 mode = type_natural_mode (type, cum);
6739 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6740 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6741 else if (TARGET_64BIT)
6742 arg = function_arg_64 (cum, mode, omode, type, named);
6744 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6746 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6748 /* This argument uses 256bit AVX modes. */
6750 cfun->machine->callee_pass_avx256_p = true;
6752 cfun->machine->caller_pass_avx256_p = true;
6758 /* A C expression that indicates when an argument must be passed by
6759 reference. If nonzero for an argument, a copy of that argument is
6760 made in memory and a pointer to the argument is passed instead of
6761 the argument itself. The pointer is passed in whatever way is
6762 appropriate for passing a pointer to that type. */
6765 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6766 enum machine_mode mode ATTRIBUTE_UNUSED,
6767 const_tree type, bool named ATTRIBUTE_UNUSED)
6769 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6771 /* See Windows x64 Software Convention. */
6772 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6774 int msize = (int) GET_MODE_SIZE (mode);
6777 /* Arrays are passed by reference. */
6778 if (TREE_CODE (type) == ARRAY_TYPE)
6781 if (AGGREGATE_TYPE_P (type))
6783 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6784 are passed by reference. */
6785 msize = int_size_in_bytes (type);
6789 /* __m128 is passed by reference. */
6791 case 1: case 2: case 4: case 8:
6797 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6803 /* Return true when TYPE should be 128bit aligned for 32bit argument
6804 passing ABI. XXX: This function is obsolete and is only used for
6805 checking psABI compatibility with previous versions of GCC. */
6808 ix86_compat_aligned_value_p (const_tree type)
6810 enum machine_mode mode = TYPE_MODE (type);
6811 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6815 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6817 if (TYPE_ALIGN (type) < 128)
6820 if (AGGREGATE_TYPE_P (type))
6822 /* Walk the aggregates recursively. */
6823 switch (TREE_CODE (type))
6827 case QUAL_UNION_TYPE:
6831 /* Walk all the structure fields. */
6832 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6834 if (TREE_CODE (field) == FIELD_DECL
6835 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6842 /* Just for use if some languages passes arrays by value. */
6843 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6854 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6855 XXX: This function is obsolete and is only used for checking psABI
6856 compatibility with previous versions of GCC. */
6859 ix86_compat_function_arg_boundary (enum machine_mode mode,
6860 const_tree type, unsigned int align)
6862 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6863 natural boundaries. */
6864 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6866 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6867 make an exception for SSE modes since these require 128bit
6870 The handling here differs from field_alignment. ICC aligns MMX
6871 arguments to 4 byte boundaries, while structure fields are aligned
6872 to 8 byte boundaries. */
6875 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6876 align = PARM_BOUNDARY;
6880 if (!ix86_compat_aligned_value_p (type))
6881 align = PARM_BOUNDARY;
6884 if (align > BIGGEST_ALIGNMENT)
6885 align = BIGGEST_ALIGNMENT;
6889 /* Return true when TYPE should be 128bit aligned for 32bit argument
6893 ix86_contains_aligned_value_p (const_tree type)
6895 enum machine_mode mode = TYPE_MODE (type);
6897 if (mode == XFmode || mode == XCmode)
6900 if (TYPE_ALIGN (type) < 128)
6903 if (AGGREGATE_TYPE_P (type))
6905 /* Walk the aggregates recursively. */
6906 switch (TREE_CODE (type))
6910 case QUAL_UNION_TYPE:
6914 /* Walk all the structure fields. */
6915 for (field = TYPE_FIELDS (type);
6917 field = DECL_CHAIN (field))
6919 if (TREE_CODE (field) == FIELD_DECL
6920 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6927 /* Just for use if some languages passes arrays by value. */
6928 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6937 return TYPE_ALIGN (type) >= 128;
6942 /* Gives the alignment boundary, in bits, of an argument with the
6943 specified mode and type. */
6946 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6951 /* Since the main variant type is used for call, we convert it to
6952 the main variant type. */
6953 type = TYPE_MAIN_VARIANT (type);
6954 align = TYPE_ALIGN (type);
6957 align = GET_MODE_ALIGNMENT (mode);
6958 if (align < PARM_BOUNDARY)
6959 align = PARM_BOUNDARY;
6963 unsigned int saved_align = align;
6967 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6970 if (mode == XFmode || mode == XCmode)
6971 align = PARM_BOUNDARY;
6973 else if (!ix86_contains_aligned_value_p (type))
6974 align = PARM_BOUNDARY;
6977 align = PARM_BOUNDARY;
6982 && align != ix86_compat_function_arg_boundary (mode, type,
6986 inform (input_location,
6987 "The ABI for passing parameters with %d-byte"
6988 " alignment has changed in GCC 4.6",
6989 align / BITS_PER_UNIT);
6996 /* Return true if N is a possible register number of function value. */
6999 ix86_function_value_regno_p (const unsigned int regno)
7006 case FIRST_FLOAT_REG:
7007 /* TODO: The function should depend on current function ABI but
7008 builtins.c would need updating then. Therefore we use the
7010 if (TARGET_64BIT && ix86_abi == MS_ABI)
7012 return TARGET_FLOAT_RETURNS_IN_80387;
7018 if (TARGET_MACHO || TARGET_64BIT)
7026 /* Define how to find the value returned by a function.
7027 VALTYPE is the data type of the value (as a tree).
7028 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7029 otherwise, FUNC is 0. */
7032 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7033 const_tree fntype, const_tree fn)
7037 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7038 we normally prevent this case when mmx is not available. However
7039 some ABIs may require the result to be returned like DImode. */
7040 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7041 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7043 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7044 we prevent this case when sse is not available. However some ABIs
7045 may require the result to be returned like integer TImode. */
7046 else if (mode == TImode
7047 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7048 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7050 /* 32-byte vector modes in %ymm0. */
7051 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7052 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7054 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7055 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7056 regno = FIRST_FLOAT_REG;
7058 /* Most things go in %eax. */
7061 /* Override FP return register with %xmm0 for local functions when
7062 SSE math is enabled or for functions with sseregparm attribute. */
7063 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7065 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7066 if ((sse_level >= 1 && mode == SFmode)
7067 || (sse_level == 2 && mode == DFmode))
7068 regno = FIRST_SSE_REG;
7071 /* OImode shouldn't be used directly. */
7072 gcc_assert (mode != OImode);
7074 return gen_rtx_REG (orig_mode, regno);
7078 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7083 /* Handle libcalls, which don't provide a type node. */
7084 if (valtype == NULL)
7096 return gen_rtx_REG (mode, FIRST_SSE_REG);
7099 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7103 return gen_rtx_REG (mode, AX_REG);
7106 else if (POINTER_TYPE_P (valtype))
7108 /* Pointers are always returned in Pmode. */
7112 ret = construct_container (mode, orig_mode, valtype, 1,
7113 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7114 x86_64_int_return_registers, 0);
7116 /* For zero sized structures, construct_container returns NULL, but we
7117 need to keep rest of compiler happy by returning meaningful value. */
7119 ret = gen_rtx_REG (orig_mode, AX_REG);
7125 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7127 unsigned int regno = AX_REG;
7131 switch (GET_MODE_SIZE (mode))
7134 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7135 && !COMPLEX_MODE_P (mode))
7136 regno = FIRST_SSE_REG;
7140 if (mode == SFmode || mode == DFmode)
7141 regno = FIRST_SSE_REG;
7147 return gen_rtx_REG (orig_mode, regno);
7151 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7152 enum machine_mode orig_mode, enum machine_mode mode)
7154 const_tree fn, fntype;
7157 if (fntype_or_decl && DECL_P (fntype_or_decl))
7158 fn = fntype_or_decl;
7159 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7161 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7162 return function_value_ms_64 (orig_mode, mode);
7163 else if (TARGET_64BIT)
7164 return function_value_64 (orig_mode, mode, valtype);
7166 return function_value_32 (orig_mode, mode, fntype, fn);
7170 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7171 bool outgoing ATTRIBUTE_UNUSED)
7173 enum machine_mode mode, orig_mode;
7175 orig_mode = TYPE_MODE (valtype);
7176 mode = type_natural_mode (valtype, NULL);
7177 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7180 /* Pointer function arguments and return values are promoted to Pmode. */
7182 static enum machine_mode
7183 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7184 int *punsignedp, const_tree fntype,
7187 if (type != NULL_TREE && POINTER_TYPE_P (type))
7189 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7192 return default_promote_function_mode (type, mode, punsignedp, fntype,
7197 ix86_libcall_value (enum machine_mode mode)
7199 return ix86_function_value_1 (NULL, NULL, mode, mode);
7202 /* Return true iff type is returned in memory. */
7204 static bool ATTRIBUTE_UNUSED
7205 return_in_memory_32 (const_tree type, enum machine_mode mode)
7209 if (mode == BLKmode)
7212 size = int_size_in_bytes (type);
7214 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7217 if (VECTOR_MODE_P (mode) || mode == TImode)
7219 /* User-created vectors small enough to fit in EAX. */
7223 /* MMX/3dNow values are returned in MM0,
7224 except when it doesn't exits or the ABI prescribes otherwise. */
7226 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7228 /* SSE values are returned in XMM0, except when it doesn't exist. */
7232 /* AVX values are returned in YMM0, except when it doesn't exist. */
7243 /* OImode shouldn't be used directly. */
7244 gcc_assert (mode != OImode);
7249 static bool ATTRIBUTE_UNUSED
7250 return_in_memory_64 (const_tree type, enum machine_mode mode)
7252 int needed_intregs, needed_sseregs;
7253 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7256 static bool ATTRIBUTE_UNUSED
7257 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7259 HOST_WIDE_INT size = int_size_in_bytes (type);
7261 /* __m128 is returned in xmm0. */
7262 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7263 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7266 /* Otherwise, the size must be exactly in [1248]. */
7267 return size != 1 && size != 2 && size != 4 && size != 8;
7271 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7273 #ifdef SUBTARGET_RETURN_IN_MEMORY
7274 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7276 const enum machine_mode mode = type_natural_mode (type, NULL);
7280 if (ix86_function_type_abi (fntype) == MS_ABI)
7281 return return_in_memory_ms_64 (type, mode);
7283 return return_in_memory_64 (type, mode);
7286 return return_in_memory_32 (type, mode);
7290 /* When returning SSE vector types, we have a choice of either
7291 (1) being abi incompatible with a -march switch, or
7292 (2) generating an error.
7293 Given no good solution, I think the safest thing is one warning.
7294 The user won't be able to use -Werror, but....
7296 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7297 called in response to actually generating a caller or callee that
7298 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7299 via aggregate_value_p for general type probing from tree-ssa. */
7302 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7304 static bool warnedsse, warnedmmx;
7306 if (!TARGET_64BIT && type)
7308 /* Look at the return type of the function, not the function type. */
7309 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7311 if (!TARGET_SSE && !warnedsse)
7314 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7317 warning (0, "SSE vector return without SSE enabled "
7322 if (!TARGET_MMX && !warnedmmx)
7324 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7327 warning (0, "MMX vector return without MMX enabled "
7337 /* Create the va_list data type. */
7339 /* Returns the calling convention specific va_list date type.
7340 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7343 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7345 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7347 /* For i386 we use plain pointer to argument area. */
7348 if (!TARGET_64BIT || abi == MS_ABI)
7349 return build_pointer_type (char_type_node);
7351 record = lang_hooks.types.make_type (RECORD_TYPE);
7352 type_decl = build_decl (BUILTINS_LOCATION,
7353 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7355 f_gpr = build_decl (BUILTINS_LOCATION,
7356 FIELD_DECL, get_identifier ("gp_offset"),
7357 unsigned_type_node);
7358 f_fpr = build_decl (BUILTINS_LOCATION,
7359 FIELD_DECL, get_identifier ("fp_offset"),
7360 unsigned_type_node);
7361 f_ovf = build_decl (BUILTINS_LOCATION,
7362 FIELD_DECL, get_identifier ("overflow_arg_area"),
7364 f_sav = build_decl (BUILTINS_LOCATION,
7365 FIELD_DECL, get_identifier ("reg_save_area"),
7368 va_list_gpr_counter_field = f_gpr;
7369 va_list_fpr_counter_field = f_fpr;
7371 DECL_FIELD_CONTEXT (f_gpr) = record;
7372 DECL_FIELD_CONTEXT (f_fpr) = record;
7373 DECL_FIELD_CONTEXT (f_ovf) = record;
7374 DECL_FIELD_CONTEXT (f_sav) = record;
7376 TYPE_STUB_DECL (record) = type_decl;
7377 TYPE_NAME (record) = type_decl;
7378 TYPE_FIELDS (record) = f_gpr;
7379 DECL_CHAIN (f_gpr) = f_fpr;
7380 DECL_CHAIN (f_fpr) = f_ovf;
7381 DECL_CHAIN (f_ovf) = f_sav;
7383 layout_type (record);
7385 /* The correct type is an array type of one element. */
7386 return build_array_type (record, build_index_type (size_zero_node));
7389 /* Setup the builtin va_list data type and for 64-bit the additional
7390 calling convention specific va_list data types. */
7393 ix86_build_builtin_va_list (void)
7395 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7397 /* Initialize abi specific va_list builtin types. */
7401 if (ix86_abi == MS_ABI)
7403 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7404 if (TREE_CODE (t) != RECORD_TYPE)
7405 t = build_variant_type_copy (t);
7406 sysv_va_list_type_node = t;
7411 if (TREE_CODE (t) != RECORD_TYPE)
7412 t = build_variant_type_copy (t);
7413 sysv_va_list_type_node = t;
7415 if (ix86_abi != MS_ABI)
7417 t = ix86_build_builtin_va_list_abi (MS_ABI);
7418 if (TREE_CODE (t) != RECORD_TYPE)
7419 t = build_variant_type_copy (t);
7420 ms_va_list_type_node = t;
7425 if (TREE_CODE (t) != RECORD_TYPE)
7426 t = build_variant_type_copy (t);
7427 ms_va_list_type_node = t;
7434 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7437 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7443 /* GPR size of varargs save area. */
7444 if (cfun->va_list_gpr_size)
7445 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7447 ix86_varargs_gpr_size = 0;
7449 /* FPR size of varargs save area. We don't need it if we don't pass
7450 anything in SSE registers. */
7451 if (TARGET_SSE && cfun->va_list_fpr_size)
7452 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7454 ix86_varargs_fpr_size = 0;
7456 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7459 save_area = frame_pointer_rtx;
7460 set = get_varargs_alias_set ();
7462 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7463 if (max > X86_64_REGPARM_MAX)
7464 max = X86_64_REGPARM_MAX;
7466 for (i = cum->regno; i < max; i++)
7468 mem = gen_rtx_MEM (Pmode,
7469 plus_constant (save_area, i * UNITS_PER_WORD));
7470 MEM_NOTRAP_P (mem) = 1;
7471 set_mem_alias_set (mem, set);
7472 emit_move_insn (mem, gen_rtx_REG (Pmode,
7473 x86_64_int_parameter_registers[i]));
7476 if (ix86_varargs_fpr_size)
7478 enum machine_mode smode;
7481 /* Now emit code to save SSE registers. The AX parameter contains number
7482 of SSE parameter registers used to call this function, though all we
7483 actually check here is the zero/non-zero status. */
7485 label = gen_label_rtx ();
7486 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7487 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7490 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7491 we used movdqa (i.e. TImode) instead? Perhaps even better would
7492 be if we could determine the real mode of the data, via a hook
7493 into pass_stdarg. Ignore all that for now. */
7495 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7496 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7498 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7499 if (max > X86_64_SSE_REGPARM_MAX)
7500 max = X86_64_SSE_REGPARM_MAX;
7502 for (i = cum->sse_regno; i < max; ++i)
7504 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7505 mem = gen_rtx_MEM (smode, mem);
7506 MEM_NOTRAP_P (mem) = 1;
7507 set_mem_alias_set (mem, set);
7508 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7510 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7518 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7520 alias_set_type set = get_varargs_alias_set ();
7523 /* Reset to zero, as there might be a sysv vaarg used
7525 ix86_varargs_gpr_size = 0;
7526 ix86_varargs_fpr_size = 0;
7528 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7532 mem = gen_rtx_MEM (Pmode,
7533 plus_constant (virtual_incoming_args_rtx,
7534 i * UNITS_PER_WORD));
7535 MEM_NOTRAP_P (mem) = 1;
7536 set_mem_alias_set (mem, set);
7538 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7539 emit_move_insn (mem, reg);
7544 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7545 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7548 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7549 CUMULATIVE_ARGS next_cum;
7552 /* This argument doesn't appear to be used anymore. Which is good,
7553 because the old code here didn't suppress rtl generation. */
7554 gcc_assert (!no_rtl);
7559 fntype = TREE_TYPE (current_function_decl);
7561 /* For varargs, we do not want to skip the dummy va_dcl argument.
7562 For stdargs, we do want to skip the last named argument. */
7564 if (stdarg_p (fntype))
7565 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7568 if (cum->call_abi == MS_ABI)
7569 setup_incoming_varargs_ms_64 (&next_cum);
7571 setup_incoming_varargs_64 (&next_cum);
7574 /* Checks if TYPE is of kind va_list char *. */
7577 is_va_list_char_pointer (tree type)
7581 /* For 32-bit it is always true. */
7584 canonic = ix86_canonical_va_list_type (type);
7585 return (canonic == ms_va_list_type_node
7586 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7589 /* Implement va_start. */
7592 ix86_va_start (tree valist, rtx nextarg)
7594 HOST_WIDE_INT words, n_gpr, n_fpr;
7595 tree f_gpr, f_fpr, f_ovf, f_sav;
7596 tree gpr, fpr, ovf, sav, t;
7600 if (flag_split_stack
7601 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7603 unsigned int scratch_regno;
7605 /* When we are splitting the stack, we can't refer to the stack
7606 arguments using internal_arg_pointer, because they may be on
7607 the old stack. The split stack prologue will arrange to
7608 leave a pointer to the old stack arguments in a scratch
7609 register, which we here copy to a pseudo-register. The split
7610 stack prologue can't set the pseudo-register directly because
7611 it (the prologue) runs before any registers have been saved. */
7613 scratch_regno = split_stack_prologue_scratch_regno ();
7614 if (scratch_regno != INVALID_REGNUM)
7618 reg = gen_reg_rtx (Pmode);
7619 cfun->machine->split_stack_varargs_pointer = reg;
7622 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7626 push_topmost_sequence ();
7627 emit_insn_after (seq, entry_of_function ());
7628 pop_topmost_sequence ();
7632 /* Only 64bit target needs something special. */
7633 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7635 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7636 std_expand_builtin_va_start (valist, nextarg);
7641 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7642 next = expand_binop (ptr_mode, add_optab,
7643 cfun->machine->split_stack_varargs_pointer,
7644 crtl->args.arg_offset_rtx,
7645 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7646 convert_move (va_r, next, 0);
7651 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7652 f_fpr = DECL_CHAIN (f_gpr);
7653 f_ovf = DECL_CHAIN (f_fpr);
7654 f_sav = DECL_CHAIN (f_ovf);
7656 valist = build_simple_mem_ref (valist);
7657 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7658 /* The following should be folded into the MEM_REF offset. */
7659 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7661 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7663 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7665 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7668 /* Count number of gp and fp argument registers used. */
7669 words = crtl->args.info.words;
7670 n_gpr = crtl->args.info.regno;
7671 n_fpr = crtl->args.info.sse_regno;
7673 if (cfun->va_list_gpr_size)
7675 type = TREE_TYPE (gpr);
7676 t = build2 (MODIFY_EXPR, type,
7677 gpr, build_int_cst (type, n_gpr * 8));
7678 TREE_SIDE_EFFECTS (t) = 1;
7679 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7682 if (TARGET_SSE && cfun->va_list_fpr_size)
7684 type = TREE_TYPE (fpr);
7685 t = build2 (MODIFY_EXPR, type, fpr,
7686 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7687 TREE_SIDE_EFFECTS (t) = 1;
7688 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7691 /* Find the overflow area. */
7692 type = TREE_TYPE (ovf);
7693 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7694 ovf_rtx = crtl->args.internal_arg_pointer;
7696 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7697 t = make_tree (type, ovf_rtx);
7699 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7700 t = build2 (MODIFY_EXPR, type, ovf, t);
7701 TREE_SIDE_EFFECTS (t) = 1;
7702 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7704 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7706 /* Find the register save area.
7707 Prologue of the function save it right above stack frame. */
7708 type = TREE_TYPE (sav);
7709 t = make_tree (type, frame_pointer_rtx);
7710 if (!ix86_varargs_gpr_size)
7711 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7712 t = build2 (MODIFY_EXPR, type, sav, t);
7713 TREE_SIDE_EFFECTS (t) = 1;
7714 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7718 /* Implement va_arg. */
7721 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7724 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7725 tree f_gpr, f_fpr, f_ovf, f_sav;
7726 tree gpr, fpr, ovf, sav, t;
7728 tree lab_false, lab_over = NULL_TREE;
7733 enum machine_mode nat_mode;
7734 unsigned int arg_boundary;
7736 /* Only 64bit target needs something special. */
7737 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7738 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7740 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7741 f_fpr = DECL_CHAIN (f_gpr);
7742 f_ovf = DECL_CHAIN (f_fpr);
7743 f_sav = DECL_CHAIN (f_ovf);
7745 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7746 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7747 valist = build_va_arg_indirect_ref (valist);
7748 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7749 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7750 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7752 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7754 type = build_pointer_type (type);
7755 size = int_size_in_bytes (type);
7756 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7758 nat_mode = type_natural_mode (type, NULL);
7767 /* Unnamed 256bit vector mode parameters are passed on stack. */
7768 if (!TARGET_64BIT_MS_ABI)
7775 container = construct_container (nat_mode, TYPE_MODE (type),
7776 type, 0, X86_64_REGPARM_MAX,
7777 X86_64_SSE_REGPARM_MAX, intreg,
7782 /* Pull the value out of the saved registers. */
7784 addr = create_tmp_var (ptr_type_node, "addr");
7788 int needed_intregs, needed_sseregs;
7790 tree int_addr, sse_addr;
7792 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7793 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7795 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7797 need_temp = (!REG_P (container)
7798 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7799 || TYPE_ALIGN (type) > 128));
7801 /* In case we are passing structure, verify that it is consecutive block
7802 on the register save area. If not we need to do moves. */
7803 if (!need_temp && !REG_P (container))
7805 /* Verify that all registers are strictly consecutive */
7806 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7810 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7812 rtx slot = XVECEXP (container, 0, i);
7813 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7814 || INTVAL (XEXP (slot, 1)) != i * 16)
7822 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7824 rtx slot = XVECEXP (container, 0, i);
7825 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7826 || INTVAL (XEXP (slot, 1)) != i * 8)
7838 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7839 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7842 /* First ensure that we fit completely in registers. */
7845 t = build_int_cst (TREE_TYPE (gpr),
7846 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7847 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7848 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7849 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7850 gimplify_and_add (t, pre_p);
7854 t = build_int_cst (TREE_TYPE (fpr),
7855 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7856 + X86_64_REGPARM_MAX * 8);
7857 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7858 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7859 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7860 gimplify_and_add (t, pre_p);
7863 /* Compute index to start of area used for integer regs. */
7866 /* int_addr = gpr + sav; */
7867 t = fold_build_pointer_plus (sav, gpr);
7868 gimplify_assign (int_addr, t, pre_p);
7872 /* sse_addr = fpr + sav; */
7873 t = fold_build_pointer_plus (sav, fpr);
7874 gimplify_assign (sse_addr, t, pre_p);
7878 int i, prev_size = 0;
7879 tree temp = create_tmp_var (type, "va_arg_tmp");
7882 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7883 gimplify_assign (addr, t, pre_p);
7885 for (i = 0; i < XVECLEN (container, 0); i++)
7887 rtx slot = XVECEXP (container, 0, i);
7888 rtx reg = XEXP (slot, 0);
7889 enum machine_mode mode = GET_MODE (reg);
7895 tree dest_addr, dest;
7896 int cur_size = GET_MODE_SIZE (mode);
7898 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7899 prev_size = INTVAL (XEXP (slot, 1));
7900 if (prev_size + cur_size > size)
7902 cur_size = size - prev_size;
7903 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7904 if (mode == BLKmode)
7907 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7908 if (mode == GET_MODE (reg))
7909 addr_type = build_pointer_type (piece_type);
7911 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7913 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7916 if (SSE_REGNO_P (REGNO (reg)))
7918 src_addr = sse_addr;
7919 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7923 src_addr = int_addr;
7924 src_offset = REGNO (reg) * 8;
7926 src_addr = fold_convert (addr_type, src_addr);
7927 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
7929 dest_addr = fold_convert (daddr_type, addr);
7930 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
7931 if (cur_size == GET_MODE_SIZE (mode))
7933 src = build_va_arg_indirect_ref (src_addr);
7934 dest = build_va_arg_indirect_ref (dest_addr);
7936 gimplify_assign (dest, src, pre_p);
7941 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7942 3, dest_addr, src_addr,
7943 size_int (cur_size));
7944 gimplify_and_add (copy, pre_p);
7946 prev_size += cur_size;
7952 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7953 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7954 gimplify_assign (gpr, t, pre_p);
7959 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7960 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7961 gimplify_assign (fpr, t, pre_p);
7964 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7966 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7969 /* ... otherwise out of the overflow area. */
7971 /* When we align parameter on stack for caller, if the parameter
7972 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7973 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7974 here with caller. */
7975 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7976 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7977 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7979 /* Care for on-stack alignment if needed. */
7980 if (arg_boundary <= 64 || size == 0)
7984 HOST_WIDE_INT align = arg_boundary / 8;
7985 t = fold_build_pointer_plus_hwi (ovf, align - 1);
7986 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7987 build_int_cst (TREE_TYPE (t), -align));
7990 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7991 gimplify_assign (addr, t, pre_p);
7993 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
7994 gimplify_assign (unshare_expr (ovf), t, pre_p);
7997 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7999 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8000 addr = fold_convert (ptrtype, addr);
8003 addr = build_va_arg_indirect_ref (addr);
8004 return build_va_arg_indirect_ref (addr);
8007 /* Return true if OPNUM's MEM should be matched
8008 in movabs* patterns. */
8011 ix86_check_movabs (rtx insn, int opnum)
8015 set = PATTERN (insn);
8016 if (GET_CODE (set) == PARALLEL)
8017 set = XVECEXP (set, 0, 0);
8018 gcc_assert (GET_CODE (set) == SET);
8019 mem = XEXP (set, opnum);
8020 while (GET_CODE (mem) == SUBREG)
8021 mem = SUBREG_REG (mem);
8022 gcc_assert (MEM_P (mem));
8023 return volatile_ok || !MEM_VOLATILE_P (mem);
8026 /* Initialize the table of extra 80387 mathematical constants. */
8029 init_ext_80387_constants (void)
8031 static const char * cst[5] =
8033 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8034 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8035 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8036 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8037 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8041 for (i = 0; i < 5; i++)
8043 real_from_string (&ext_80387_constants_table[i], cst[i]);
8044 /* Ensure each constant is rounded to XFmode precision. */
8045 real_convert (&ext_80387_constants_table[i],
8046 XFmode, &ext_80387_constants_table[i]);
8049 ext_80387_constants_init = 1;
8052 /* Return non-zero if the constant is something that
8053 can be loaded with a special instruction. */
8056 standard_80387_constant_p (rtx x)
8058 enum machine_mode mode = GET_MODE (x);
8062 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8065 if (x == CONST0_RTX (mode))
8067 if (x == CONST1_RTX (mode))
8070 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8072 /* For XFmode constants, try to find a special 80387 instruction when
8073 optimizing for size or on those CPUs that benefit from them. */
8075 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8079 if (! ext_80387_constants_init)
8080 init_ext_80387_constants ();
8082 for (i = 0; i < 5; i++)
8083 if (real_identical (&r, &ext_80387_constants_table[i]))
8087 /* Load of the constant -0.0 or -1.0 will be split as
8088 fldz;fchs or fld1;fchs sequence. */
8089 if (real_isnegzero (&r))
8091 if (real_identical (&r, &dconstm1))
8097 /* Return the opcode of the special instruction to be used to load
8101 standard_80387_constant_opcode (rtx x)
8103 switch (standard_80387_constant_p (x))
8127 /* Return the CONST_DOUBLE representing the 80387 constant that is
8128 loaded by the specified special instruction. The argument IDX
8129 matches the return value from standard_80387_constant_p. */
8132 standard_80387_constant_rtx (int idx)
8136 if (! ext_80387_constants_init)
8137 init_ext_80387_constants ();
8153 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8157 /* Return 1 if X is all 0s and 2 if x is all 1s
8158 in supported SSE vector mode. */
8161 standard_sse_constant_p (rtx x)
8163 enum machine_mode mode = GET_MODE (x);
8165 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8167 if (vector_all_ones_operand (x, mode))
8183 /* Return the opcode of the special instruction to be used to load
8187 standard_sse_constant_opcode (rtx insn, rtx x)
8189 switch (standard_sse_constant_p (x))
8192 switch (get_attr_mode (insn))
8195 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8196 return "%vpxor\t%0, %d0";
8198 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8199 return "%vxorpd\t%0, %d0";
8201 return "%vxorps\t%0, %d0";
8204 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8205 return "vpxor\t%x0, %x0, %x0";
8207 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8208 return "vxorpd\t%x0, %x0, %x0";
8210 return "vxorps\t%x0, %x0, %x0";
8217 return "%vpcmpeqd\t%0, %d0";
8224 /* Returns true if OP contains a symbol reference */
8227 symbolic_reference_mentioned_p (rtx op)
8232 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8235 fmt = GET_RTX_FORMAT (GET_CODE (op));
8236 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8242 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8243 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8247 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8254 /* Return true if it is appropriate to emit `ret' instructions in the
8255 body of a function. Do this only if the epilogue is simple, needing a
8256 couple of insns. Prior to reloading, we can't tell how many registers
8257 must be saved, so return false then. Return false if there is no frame
8258 marker to de-allocate. */
8261 ix86_can_use_return_insn_p (void)
8263 struct ix86_frame frame;
8265 if (! reload_completed || frame_pointer_needed)
8268 /* Don't allow more than 32k pop, since that's all we can do
8269 with one instruction. */
8270 if (crtl->args.pops_args && crtl->args.size >= 32768)
8273 ix86_compute_frame_layout (&frame);
8274 return (frame.stack_pointer_offset == UNITS_PER_WORD
8275 && (frame.nregs + frame.nsseregs) == 0);
8278 /* Value should be nonzero if functions must have frame pointers.
8279 Zero means the frame pointer need not be set up (and parms may
8280 be accessed via the stack pointer) in functions that seem suitable. */
8283 ix86_frame_pointer_required (void)
8285 /* If we accessed previous frames, then the generated code expects
8286 to be able to access the saved ebp value in our frame. */
8287 if (cfun->machine->accesses_prev_frame)
8290 /* Several x86 os'es need a frame pointer for other reasons,
8291 usually pertaining to setjmp. */
8292 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8295 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8296 turns off the frame pointer by default. Turn it back on now if
8297 we've not got a leaf function. */
8298 if (TARGET_OMIT_LEAF_FRAME_POINTER
8299 && (!current_function_is_leaf
8300 || ix86_current_function_calls_tls_descriptor))
8303 if (crtl->profile && !flag_fentry)
8309 /* Record that the current function accesses previous call frames. */
8312 ix86_setup_frame_addresses (void)
8314 cfun->machine->accesses_prev_frame = 1;
8317 #ifndef USE_HIDDEN_LINKONCE
8318 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8319 # define USE_HIDDEN_LINKONCE 1
8321 # define USE_HIDDEN_LINKONCE 0
8325 static int pic_labels_used;
8327 /* Fills in the label name that should be used for a pc thunk for
8328 the given register. */
8331 get_pc_thunk_name (char name[32], unsigned int regno)
8333 gcc_assert (!TARGET_64BIT);
8335 if (USE_HIDDEN_LINKONCE)
8336 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8338 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8342 /* This function generates code for -fpic that loads %ebx with
8343 the return address of the caller and then returns. */
8346 ix86_code_end (void)
8351 for (regno = AX_REG; regno <= SP_REG; regno++)
8356 if (!(pic_labels_used & (1 << regno)))
8359 get_pc_thunk_name (name, regno);
8361 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8362 get_identifier (name),
8363 build_function_type_list (void_type_node, NULL_TREE));
8364 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8365 NULL_TREE, void_type_node);
8366 TREE_PUBLIC (decl) = 1;
8367 TREE_STATIC (decl) = 1;
8372 switch_to_section (darwin_sections[text_coal_section]);
8373 fputs ("\t.weak_definition\t", asm_out_file);
8374 assemble_name (asm_out_file, name);
8375 fputs ("\n\t.private_extern\t", asm_out_file);
8376 assemble_name (asm_out_file, name);
8377 putc ('\n', asm_out_file);
8378 ASM_OUTPUT_LABEL (asm_out_file, name);
8379 DECL_WEAK (decl) = 1;
8383 if (USE_HIDDEN_LINKONCE)
8385 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8387 targetm.asm_out.unique_section (decl, 0);
8388 switch_to_section (get_named_section (decl, NULL, 0));
8390 targetm.asm_out.globalize_label (asm_out_file, name);
8391 fputs ("\t.hidden\t", asm_out_file);
8392 assemble_name (asm_out_file, name);
8393 putc ('\n', asm_out_file);
8394 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8398 switch_to_section (text_section);
8399 ASM_OUTPUT_LABEL (asm_out_file, name);
8402 DECL_INITIAL (decl) = make_node (BLOCK);
8403 current_function_decl = decl;
8404 init_function_start (decl);
8405 first_function_block_is_cold = false;
8406 /* Make sure unwind info is emitted for the thunk if needed. */
8407 final_start_function (emit_barrier (), asm_out_file, 1);
8409 /* Pad stack IP move with 4 instructions (two NOPs count
8410 as one instruction). */
8411 if (TARGET_PAD_SHORT_FUNCTION)
8416 fputs ("\tnop\n", asm_out_file);
8419 xops[0] = gen_rtx_REG (Pmode, regno);
8420 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8421 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8422 fputs ("\tret\n", asm_out_file);
8423 final_end_function ();
8424 init_insn_lengths ();
8425 free_after_compilation (cfun);
8427 current_function_decl = NULL;
8430 if (flag_split_stack)
8431 file_end_indicate_split_stack ();
8434 /* Emit code for the SET_GOT patterns. */
8437 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8443 if (TARGET_VXWORKS_RTP && flag_pic)
8445 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8446 xops[2] = gen_rtx_MEM (Pmode,
8447 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8448 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8450 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8451 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8452 an unadorned address. */
8453 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8454 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8455 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8459 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8463 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8465 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8468 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8469 is what will be referenced by the Mach-O PIC subsystem. */
8471 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8474 targetm.asm_out.internal_label (asm_out_file, "L",
8475 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8480 get_pc_thunk_name (name, REGNO (dest));
8481 pic_labels_used |= 1 << REGNO (dest);
8483 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8484 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8485 output_asm_insn ("call\t%X2", xops);
8486 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8487 is what will be referenced by the Mach-O PIC subsystem. */
8490 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8492 targetm.asm_out.internal_label (asm_out_file, "L",
8493 CODE_LABEL_NUMBER (label));
8498 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8503 /* Generate an "push" pattern for input ARG. */
8508 struct machine_function *m = cfun->machine;
8510 if (m->fs.cfa_reg == stack_pointer_rtx)
8511 m->fs.cfa_offset += UNITS_PER_WORD;
8512 m->fs.sp_offset += UNITS_PER_WORD;
8514 return gen_rtx_SET (VOIDmode,
8516 gen_rtx_PRE_DEC (Pmode,
8517 stack_pointer_rtx)),
8521 /* Generate an "pop" pattern for input ARG. */
8526 return gen_rtx_SET (VOIDmode,
8529 gen_rtx_POST_INC (Pmode,
8530 stack_pointer_rtx)));
8533 /* Return >= 0 if there is an unused call-clobbered register available
8534 for the entire function. */
8537 ix86_select_alt_pic_regnum (void)
8539 if (current_function_is_leaf
8541 && !ix86_current_function_calls_tls_descriptor)
8544 /* Can't use the same register for both PIC and DRAP. */
8546 drap = REGNO (crtl->drap_reg);
8549 for (i = 2; i >= 0; --i)
8550 if (i != drap && !df_regs_ever_live_p (i))
8554 return INVALID_REGNUM;
8557 /* Return TRUE if we need to save REGNO. */
8560 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8562 if (pic_offset_table_rtx
8563 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8564 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8566 || crtl->calls_eh_return
8567 || crtl->uses_const_pool))
8568 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8570 if (crtl->calls_eh_return && maybe_eh_return)
8575 unsigned test = EH_RETURN_DATA_REGNO (i);
8576 if (test == INVALID_REGNUM)
8583 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8586 return (df_regs_ever_live_p (regno)
8587 && !call_used_regs[regno]
8588 && !fixed_regs[regno]
8589 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8592 /* Return number of saved general prupose registers. */
8595 ix86_nsaved_regs (void)
8600 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8601 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8606 /* Return number of saved SSE registrers. */
8609 ix86_nsaved_sseregs (void)
8614 if (!TARGET_64BIT_MS_ABI)
8616 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8617 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8622 /* Given FROM and TO register numbers, say whether this elimination is
8623 allowed. If stack alignment is needed, we can only replace argument
8624 pointer with hard frame pointer, or replace frame pointer with stack
8625 pointer. Otherwise, frame pointer elimination is automatically
8626 handled and all other eliminations are valid. */
8629 ix86_can_eliminate (const int from, const int to)
8631 if (stack_realign_fp)
8632 return ((from == ARG_POINTER_REGNUM
8633 && to == HARD_FRAME_POINTER_REGNUM)
8634 || (from == FRAME_POINTER_REGNUM
8635 && to == STACK_POINTER_REGNUM));
8637 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8640 /* Return the offset between two registers, one to be eliminated, and the other
8641 its replacement, at the start of a routine. */
8644 ix86_initial_elimination_offset (int from, int to)
8646 struct ix86_frame frame;
8647 ix86_compute_frame_layout (&frame);
8649 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8650 return frame.hard_frame_pointer_offset;
8651 else if (from == FRAME_POINTER_REGNUM
8652 && to == HARD_FRAME_POINTER_REGNUM)
8653 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8656 gcc_assert (to == STACK_POINTER_REGNUM);
8658 if (from == ARG_POINTER_REGNUM)
8659 return frame.stack_pointer_offset;
8661 gcc_assert (from == FRAME_POINTER_REGNUM);
8662 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8666 /* In a dynamically-aligned function, we can't know the offset from
8667 stack pointer to frame pointer, so we must ensure that setjmp
8668 eliminates fp against the hard fp (%ebp) rather than trying to
8669 index from %esp up to the top of the frame across a gap that is
8670 of unknown (at compile-time) size. */
8672 ix86_builtin_setjmp_frame_value (void)
8674 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8677 /* When using -fsplit-stack, the allocation routines set a field in
8678 the TCB to the bottom of the stack plus this much space, measured
8681 #define SPLIT_STACK_AVAILABLE 256
8683 /* Fill structure ix86_frame about frame of currently computed function. */
8686 ix86_compute_frame_layout (struct ix86_frame *frame)
8688 unsigned int stack_alignment_needed;
8689 HOST_WIDE_INT offset;
8690 unsigned int preferred_alignment;
8691 HOST_WIDE_INT size = get_frame_size ();
8692 HOST_WIDE_INT to_allocate;
8694 frame->nregs = ix86_nsaved_regs ();
8695 frame->nsseregs = ix86_nsaved_sseregs ();
8697 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8698 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8700 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8701 function prologues and leaf. */
8702 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8703 && (!current_function_is_leaf || cfun->calls_alloca != 0
8704 || ix86_current_function_calls_tls_descriptor))
8706 preferred_alignment = 16;
8707 stack_alignment_needed = 16;
8708 crtl->preferred_stack_boundary = 128;
8709 crtl->stack_alignment_needed = 128;
8712 gcc_assert (!size || stack_alignment_needed);
8713 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8714 gcc_assert (preferred_alignment <= stack_alignment_needed);
8716 /* For SEH we have to limit the amount of code movement into the prologue.
8717 At present we do this via a BLOCKAGE, at which point there's very little
8718 scheduling that can be done, which means that there's very little point
8719 in doing anything except PUSHs. */
8721 cfun->machine->use_fast_prologue_epilogue = false;
8723 /* During reload iteration the amount of registers saved can change.
8724 Recompute the value as needed. Do not recompute when amount of registers
8725 didn't change as reload does multiple calls to the function and does not
8726 expect the decision to change within single iteration. */
8727 else if (!optimize_function_for_size_p (cfun)
8728 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8730 int count = frame->nregs;
8731 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8733 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8735 /* The fast prologue uses move instead of push to save registers. This
8736 is significantly longer, but also executes faster as modern hardware
8737 can execute the moves in parallel, but can't do that for push/pop.
8739 Be careful about choosing what prologue to emit: When function takes
8740 many instructions to execute we may use slow version as well as in
8741 case function is known to be outside hot spot (this is known with
8742 feedback only). Weight the size of function by number of registers
8743 to save as it is cheap to use one or two push instructions but very
8744 slow to use many of them. */
8746 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8747 if (node->frequency < NODE_FREQUENCY_NORMAL
8748 || (flag_branch_probabilities
8749 && node->frequency < NODE_FREQUENCY_HOT))
8750 cfun->machine->use_fast_prologue_epilogue = false;
8752 cfun->machine->use_fast_prologue_epilogue
8753 = !expensive_function_p (count);
8756 frame->save_regs_using_mov
8757 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8758 /* If static stack checking is enabled and done with probes,
8759 the registers need to be saved before allocating the frame. */
8760 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8762 /* Skip return address. */
8763 offset = UNITS_PER_WORD;
8765 /* Skip pushed static chain. */
8766 if (ix86_static_chain_on_stack)
8767 offset += UNITS_PER_WORD;
8769 /* Skip saved base pointer. */
8770 if (frame_pointer_needed)
8771 offset += UNITS_PER_WORD;
8772 frame->hfp_save_offset = offset;
8774 /* The traditional frame pointer location is at the top of the frame. */
8775 frame->hard_frame_pointer_offset = offset;
8777 /* Register save area */
8778 offset += frame->nregs * UNITS_PER_WORD;
8779 frame->reg_save_offset = offset;
8781 /* Align and set SSE register save area. */
8782 if (frame->nsseregs)
8784 /* The only ABI that has saved SSE registers (Win64) also has a
8785 16-byte aligned default stack, and thus we don't need to be
8786 within the re-aligned local stack frame to save them. */
8787 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8788 offset = (offset + 16 - 1) & -16;
8789 offset += frame->nsseregs * 16;
8791 frame->sse_reg_save_offset = offset;
8793 /* The re-aligned stack starts here. Values before this point are not
8794 directly comparable with values below this point. In order to make
8795 sure that no value happens to be the same before and after, force
8796 the alignment computation below to add a non-zero value. */
8797 if (stack_realign_fp)
8798 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8801 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8802 offset += frame->va_arg_size;
8804 /* Align start of frame for local function. */
8805 if (stack_realign_fp
8806 || offset != frame->sse_reg_save_offset
8808 || !current_function_is_leaf
8809 || cfun->calls_alloca
8810 || ix86_current_function_calls_tls_descriptor)
8811 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8813 /* Frame pointer points here. */
8814 frame->frame_pointer_offset = offset;
8818 /* Add outgoing arguments area. Can be skipped if we eliminated
8819 all the function calls as dead code.
8820 Skipping is however impossible when function calls alloca. Alloca
8821 expander assumes that last crtl->outgoing_args_size
8822 of stack frame are unused. */
8823 if (ACCUMULATE_OUTGOING_ARGS
8824 && (!current_function_is_leaf || cfun->calls_alloca
8825 || ix86_current_function_calls_tls_descriptor))
8827 offset += crtl->outgoing_args_size;
8828 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8831 frame->outgoing_arguments_size = 0;
8833 /* Align stack boundary. Only needed if we're calling another function
8835 if (!current_function_is_leaf || cfun->calls_alloca
8836 || ix86_current_function_calls_tls_descriptor)
8837 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8839 /* We've reached end of stack frame. */
8840 frame->stack_pointer_offset = offset;
8842 /* Size prologue needs to allocate. */
8843 to_allocate = offset - frame->sse_reg_save_offset;
8845 if ((!to_allocate && frame->nregs <= 1)
8846 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8847 frame->save_regs_using_mov = false;
8849 if (ix86_using_red_zone ()
8850 && current_function_sp_is_unchanging
8851 && current_function_is_leaf
8852 && !ix86_current_function_calls_tls_descriptor)
8854 frame->red_zone_size = to_allocate;
8855 if (frame->save_regs_using_mov)
8856 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8857 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8858 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8861 frame->red_zone_size = 0;
8862 frame->stack_pointer_offset -= frame->red_zone_size;
8864 /* The SEH frame pointer location is near the bottom of the frame.
8865 This is enforced by the fact that the difference between the
8866 stack pointer and the frame pointer is limited to 240 bytes in
8867 the unwind data structure. */
8872 /* If we can leave the frame pointer where it is, do so. */
8873 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8874 if (diff > 240 || (diff & 15) != 0)
8876 /* Ideally we'd determine what portion of the local stack frame
8877 (within the constraint of the lowest 240) is most heavily used.
8878 But without that complication, simply bias the frame pointer
8879 by 128 bytes so as to maximize the amount of the local stack
8880 frame that is addressable with 8-bit offsets. */
8881 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8886 /* This is semi-inlined memory_address_length, but simplified
8887 since we know that we're always dealing with reg+offset, and
8888 to avoid having to create and discard all that rtl. */
8891 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8897 /* EBP and R13 cannot be encoded without an offset. */
8898 len = (regno == BP_REG || regno == R13_REG);
8900 else if (IN_RANGE (offset, -128, 127))
8903 /* ESP and R12 must be encoded with a SIB byte. */
8904 if (regno == SP_REG || regno == R12_REG)
8910 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8911 The valid base registers are taken from CFUN->MACHINE->FS. */
8914 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8916 const struct machine_function *m = cfun->machine;
8917 rtx base_reg = NULL;
8918 HOST_WIDE_INT base_offset = 0;
8920 if (m->use_fast_prologue_epilogue)
8922 /* Choose the base register most likely to allow the most scheduling
8923 opportunities. Generally FP is valid througout the function,
8924 while DRAP must be reloaded within the epilogue. But choose either
8925 over the SP due to increased encoding size. */
8929 base_reg = hard_frame_pointer_rtx;
8930 base_offset = m->fs.fp_offset - cfa_offset;
8932 else if (m->fs.drap_valid)
8934 base_reg = crtl->drap_reg;
8935 base_offset = 0 - cfa_offset;
8937 else if (m->fs.sp_valid)
8939 base_reg = stack_pointer_rtx;
8940 base_offset = m->fs.sp_offset - cfa_offset;
8945 HOST_WIDE_INT toffset;
8948 /* Choose the base register with the smallest address encoding.
8949 With a tie, choose FP > DRAP > SP. */
8952 base_reg = stack_pointer_rtx;
8953 base_offset = m->fs.sp_offset - cfa_offset;
8954 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8956 if (m->fs.drap_valid)
8958 toffset = 0 - cfa_offset;
8959 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8962 base_reg = crtl->drap_reg;
8963 base_offset = toffset;
8969 toffset = m->fs.fp_offset - cfa_offset;
8970 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8973 base_reg = hard_frame_pointer_rtx;
8974 base_offset = toffset;
8979 gcc_assert (base_reg != NULL);
8981 return plus_constant (base_reg, base_offset);
8984 /* Emit code to save registers in the prologue. */
8987 ix86_emit_save_regs (void)
8992 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8993 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8995 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8996 RTX_FRAME_RELATED_P (insn) = 1;
9000 /* Emit a single register save at CFA - CFA_OFFSET. */
9003 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9004 HOST_WIDE_INT cfa_offset)
9006 struct machine_function *m = cfun->machine;
9007 rtx reg = gen_rtx_REG (mode, regno);
9008 rtx mem, addr, base, insn;
9010 addr = choose_baseaddr (cfa_offset);
9011 mem = gen_frame_mem (mode, addr);
9013 /* For SSE saves, we need to indicate the 128-bit alignment. */
9014 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9016 insn = emit_move_insn (mem, reg);
9017 RTX_FRAME_RELATED_P (insn) = 1;
9020 if (GET_CODE (base) == PLUS)
9021 base = XEXP (base, 0);
9022 gcc_checking_assert (REG_P (base));
9024 /* When saving registers into a re-aligned local stack frame, avoid
9025 any tricky guessing by dwarf2out. */
9026 if (m->fs.realigned)
9028 gcc_checking_assert (stack_realign_drap);
9030 if (regno == REGNO (crtl->drap_reg))
9032 /* A bit of a hack. We force the DRAP register to be saved in
9033 the re-aligned stack frame, which provides us with a copy
9034 of the CFA that will last past the prologue. Install it. */
9035 gcc_checking_assert (cfun->machine->fs.fp_valid);
9036 addr = plus_constant (hard_frame_pointer_rtx,
9037 cfun->machine->fs.fp_offset - cfa_offset);
9038 mem = gen_rtx_MEM (mode, addr);
9039 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9043 /* The frame pointer is a stable reference within the
9044 aligned frame. Use it. */
9045 gcc_checking_assert (cfun->machine->fs.fp_valid);
9046 addr = plus_constant (hard_frame_pointer_rtx,
9047 cfun->machine->fs.fp_offset - cfa_offset);
9048 mem = gen_rtx_MEM (mode, addr);
9049 add_reg_note (insn, REG_CFA_EXPRESSION,
9050 gen_rtx_SET (VOIDmode, mem, reg));
9054 /* The memory may not be relative to the current CFA register,
9055 which means that we may need to generate a new pattern for
9056 use by the unwind info. */
9057 else if (base != m->fs.cfa_reg)
9059 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9060 mem = gen_rtx_MEM (mode, addr);
9061 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9065 /* Emit code to save registers using MOV insns.
9066 First register is stored at CFA - CFA_OFFSET. */
9068 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9072 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9073 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9075 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9076 cfa_offset -= UNITS_PER_WORD;
9080 /* Emit code to save SSE registers using MOV insns.
9081 First register is stored at CFA - CFA_OFFSET. */
9083 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9087 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9088 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9090 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9095 static GTY(()) rtx queued_cfa_restores;
9097 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9098 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9099 Don't add the note if the previously saved value will be left untouched
9100 within stack red-zone till return, as unwinders can find the same value
9101 in the register and on the stack. */
9104 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9106 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9111 add_reg_note (insn, REG_CFA_RESTORE, reg);
9112 RTX_FRAME_RELATED_P (insn) = 1;
9116 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9119 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9122 ix86_add_queued_cfa_restore_notes (rtx insn)
9125 if (!queued_cfa_restores)
9127 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9129 XEXP (last, 1) = REG_NOTES (insn);
9130 REG_NOTES (insn) = queued_cfa_restores;
9131 queued_cfa_restores = NULL_RTX;
9132 RTX_FRAME_RELATED_P (insn) = 1;
9135 /* Expand prologue or epilogue stack adjustment.
9136 The pattern exist to put a dependency on all ebp-based memory accesses.
9137 STYLE should be negative if instructions should be marked as frame related,
9138 zero if %r11 register is live and cannot be freely used and positive
9142 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9143 int style, bool set_cfa)
9145 struct machine_function *m = cfun->machine;
9147 bool add_frame_related_expr = false;
9150 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9151 else if (x86_64_immediate_operand (offset, DImode))
9152 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9156 /* r11 is used by indirect sibcall return as well, set before the
9157 epilogue and used after the epilogue. */
9159 tmp = gen_rtx_REG (DImode, R11_REG);
9162 gcc_assert (src != hard_frame_pointer_rtx
9163 && dest != hard_frame_pointer_rtx);
9164 tmp = hard_frame_pointer_rtx;
9166 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9168 add_frame_related_expr = true;
9170 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9173 insn = emit_insn (insn);
9175 ix86_add_queued_cfa_restore_notes (insn);
9181 gcc_assert (m->fs.cfa_reg == src);
9182 m->fs.cfa_offset += INTVAL (offset);
9183 m->fs.cfa_reg = dest;
9185 r = gen_rtx_PLUS (Pmode, src, offset);
9186 r = gen_rtx_SET (VOIDmode, dest, r);
9187 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9188 RTX_FRAME_RELATED_P (insn) = 1;
9192 RTX_FRAME_RELATED_P (insn) = 1;
9193 if (add_frame_related_expr)
9195 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9196 r = gen_rtx_SET (VOIDmode, dest, r);
9197 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9201 if (dest == stack_pointer_rtx)
9203 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9204 bool valid = m->fs.sp_valid;
9206 if (src == hard_frame_pointer_rtx)
9208 valid = m->fs.fp_valid;
9209 ooffset = m->fs.fp_offset;
9211 else if (src == crtl->drap_reg)
9213 valid = m->fs.drap_valid;
9218 /* Else there are two possibilities: SP itself, which we set
9219 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9220 taken care of this by hand along the eh_return path. */
9221 gcc_checking_assert (src == stack_pointer_rtx
9222 || offset == const0_rtx);
9225 m->fs.sp_offset = ooffset - INTVAL (offset);
9226 m->fs.sp_valid = valid;
9230 /* Find an available register to be used as dynamic realign argument
9231 pointer regsiter. Such a register will be written in prologue and
9232 used in begin of body, so it must not be
9233 1. parameter passing register.
9235 We reuse static-chain register if it is available. Otherwise, we
9236 use DI for i386 and R13 for x86-64. We chose R13 since it has
9239 Return: the regno of chosen register. */
9242 find_drap_reg (void)
9244 tree decl = cfun->decl;
9248 /* Use R13 for nested function or function need static chain.
9249 Since function with tail call may use any caller-saved
9250 registers in epilogue, DRAP must not use caller-saved
9251 register in such case. */
9252 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9259 /* Use DI for nested function or function need static chain.
9260 Since function with tail call may use any caller-saved
9261 registers in epilogue, DRAP must not use caller-saved
9262 register in such case. */
9263 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9266 /* Reuse static chain register if it isn't used for parameter
9268 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9270 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9271 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9278 /* Return minimum incoming stack alignment. */
9281 ix86_minimum_incoming_stack_boundary (bool sibcall)
9283 unsigned int incoming_stack_boundary;
9285 /* Prefer the one specified at command line. */
9286 if (ix86_user_incoming_stack_boundary)
9287 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9288 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9289 if -mstackrealign is used, it isn't used for sibcall check and
9290 estimated stack alignment is 128bit. */
9293 && ix86_force_align_arg_pointer
9294 && crtl->stack_alignment_estimated == 128)
9295 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9297 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9299 /* Incoming stack alignment can be changed on individual functions
9300 via force_align_arg_pointer attribute. We use the smallest
9301 incoming stack boundary. */
9302 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9303 && lookup_attribute (ix86_force_align_arg_pointer_string,
9304 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9305 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9307 /* The incoming stack frame has to be aligned at least at
9308 parm_stack_boundary. */
9309 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9310 incoming_stack_boundary = crtl->parm_stack_boundary;
9312 /* Stack at entrance of main is aligned by runtime. We use the
9313 smallest incoming stack boundary. */
9314 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9315 && DECL_NAME (current_function_decl)
9316 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9317 && DECL_FILE_SCOPE_P (current_function_decl))
9318 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9320 return incoming_stack_boundary;
9323 /* Update incoming stack boundary and estimated stack alignment. */
9326 ix86_update_stack_boundary (void)
9328 ix86_incoming_stack_boundary
9329 = ix86_minimum_incoming_stack_boundary (false);
9331 /* x86_64 vararg needs 16byte stack alignment for register save
9335 && crtl->stack_alignment_estimated < 128)
9336 crtl->stack_alignment_estimated = 128;
9339 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9340 needed or an rtx for DRAP otherwise. */
9343 ix86_get_drap_rtx (void)
9345 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9346 crtl->need_drap = true;
9348 if (stack_realign_drap)
9350 /* Assign DRAP to vDRAP and returns vDRAP */
9351 unsigned int regno = find_drap_reg ();
9356 arg_ptr = gen_rtx_REG (Pmode, regno);
9357 crtl->drap_reg = arg_ptr;
9360 drap_vreg = copy_to_reg (arg_ptr);
9364 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9367 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9368 RTX_FRAME_RELATED_P (insn) = 1;
9376 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9379 ix86_internal_arg_pointer (void)
9381 return virtual_incoming_args_rtx;
9384 struct scratch_reg {
9389 /* Return a short-lived scratch register for use on function entry.
9390 In 32-bit mode, it is valid only after the registers are saved
9391 in the prologue. This register must be released by means of
9392 release_scratch_register_on_entry once it is dead. */
9395 get_scratch_register_on_entry (struct scratch_reg *sr)
9403 /* We always use R11 in 64-bit mode. */
9408 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9410 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9411 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9412 int regparm = ix86_function_regparm (fntype, decl);
9414 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9416 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9417 for the static chain register. */
9418 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9419 && drap_regno != AX_REG)
9421 else if (regparm < 2 && drap_regno != DX_REG)
9423 /* ecx is the static chain register. */
9424 else if (regparm < 3 && !fastcall_p && !static_chain_p
9425 && drap_regno != CX_REG)
9427 else if (ix86_save_reg (BX_REG, true))
9429 /* esi is the static chain register. */
9430 else if (!(regparm == 3 && static_chain_p)
9431 && ix86_save_reg (SI_REG, true))
9433 else if (ix86_save_reg (DI_REG, true))
9437 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9442 sr->reg = gen_rtx_REG (Pmode, regno);
9445 rtx insn = emit_insn (gen_push (sr->reg));
9446 RTX_FRAME_RELATED_P (insn) = 1;
9450 /* Release a scratch register obtained from the preceding function. */
9453 release_scratch_register_on_entry (struct scratch_reg *sr)
9457 rtx x, insn = emit_insn (gen_pop (sr->reg));
9459 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9460 RTX_FRAME_RELATED_P (insn) = 1;
9461 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9462 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9463 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9467 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9469 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9472 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9474 /* We skip the probe for the first interval + a small dope of 4 words and
9475 probe that many bytes past the specified size to maintain a protection
9476 area at the botton of the stack. */
9477 const int dope = 4 * UNITS_PER_WORD;
9478 rtx size_rtx = GEN_INT (size), last;
9480 /* See if we have a constant small number of probes to generate. If so,
9481 that's the easy case. The run-time loop is made up of 11 insns in the
9482 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9483 for n # of intervals. */
9484 if (size <= 5 * PROBE_INTERVAL)
9486 HOST_WIDE_INT i, adjust;
9487 bool first_probe = true;
9489 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9490 values of N from 1 until it exceeds SIZE. If only one probe is
9491 needed, this will not generate any code. Then adjust and probe
9492 to PROBE_INTERVAL + SIZE. */
9493 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9497 adjust = 2 * PROBE_INTERVAL + dope;
9498 first_probe = false;
9501 adjust = PROBE_INTERVAL;
9503 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9504 plus_constant (stack_pointer_rtx, -adjust)));
9505 emit_stack_probe (stack_pointer_rtx);
9509 adjust = size + PROBE_INTERVAL + dope;
9511 adjust = size + PROBE_INTERVAL - i;
9513 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9514 plus_constant (stack_pointer_rtx, -adjust)));
9515 emit_stack_probe (stack_pointer_rtx);
9517 /* Adjust back to account for the additional first interval. */
9518 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9519 plus_constant (stack_pointer_rtx,
9520 PROBE_INTERVAL + dope)));
9523 /* Otherwise, do the same as above, but in a loop. Note that we must be
9524 extra careful with variables wrapping around because we might be at
9525 the very top (or the very bottom) of the address space and we have
9526 to be able to handle this case properly; in particular, we use an
9527 equality test for the loop condition. */
9530 HOST_WIDE_INT rounded_size;
9531 struct scratch_reg sr;
9533 get_scratch_register_on_entry (&sr);
9536 /* Step 1: round SIZE to the previous multiple of the interval. */
9538 rounded_size = size & -PROBE_INTERVAL;
9541 /* Step 2: compute initial and final value of the loop counter. */
9543 /* SP = SP_0 + PROBE_INTERVAL. */
9544 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9545 plus_constant (stack_pointer_rtx,
9546 - (PROBE_INTERVAL + dope))));
9548 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9549 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9550 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9551 gen_rtx_PLUS (Pmode, sr.reg,
9552 stack_pointer_rtx)));
9557 while (SP != LAST_ADDR)
9559 SP = SP + PROBE_INTERVAL
9563 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9564 values of N from 1 until it is equal to ROUNDED_SIZE. */
9566 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9569 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9570 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9572 if (size != rounded_size)
9574 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9575 plus_constant (stack_pointer_rtx,
9576 rounded_size - size)));
9577 emit_stack_probe (stack_pointer_rtx);
9580 /* Adjust back to account for the additional first interval. */
9581 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9582 plus_constant (stack_pointer_rtx,
9583 PROBE_INTERVAL + dope)));
9585 release_scratch_register_on_entry (&sr);
9588 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9590 /* Even if the stack pointer isn't the CFA register, we need to correctly
9591 describe the adjustments made to it, in particular differentiate the
9592 frame-related ones from the frame-unrelated ones. */
9595 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9596 XVECEXP (expr, 0, 0)
9597 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9598 plus_constant (stack_pointer_rtx, -size));
9599 XVECEXP (expr, 0, 1)
9600 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9601 plus_constant (stack_pointer_rtx,
9602 PROBE_INTERVAL + dope + size));
9603 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9604 RTX_FRAME_RELATED_P (last) = 1;
9606 cfun->machine->fs.sp_offset += size;
9609 /* Make sure nothing is scheduled before we are done. */
9610 emit_insn (gen_blockage ());
9613 /* Adjust the stack pointer up to REG while probing it. */
9616 output_adjust_stack_and_probe (rtx reg)
9618 static int labelno = 0;
9619 char loop_lab[32], end_lab[32];
9622 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9623 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9625 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9627 /* Jump to END_LAB if SP == LAST_ADDR. */
9628 xops[0] = stack_pointer_rtx;
9630 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9631 fputs ("\tje\t", asm_out_file);
9632 assemble_name_raw (asm_out_file, end_lab);
9633 fputc ('\n', asm_out_file);
9635 /* SP = SP + PROBE_INTERVAL. */
9636 xops[1] = GEN_INT (PROBE_INTERVAL);
9637 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9640 xops[1] = const0_rtx;
9641 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9643 fprintf (asm_out_file, "\tjmp\t");
9644 assemble_name_raw (asm_out_file, loop_lab);
9645 fputc ('\n', asm_out_file);
9647 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9652 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9653 inclusive. These are offsets from the current stack pointer. */
9656 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9658 /* See if we have a constant small number of probes to generate. If so,
9659 that's the easy case. The run-time loop is made up of 7 insns in the
9660 generic case while the compile-time loop is made up of n insns for n #
9662 if (size <= 7 * PROBE_INTERVAL)
9666 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9667 it exceeds SIZE. If only one probe is needed, this will not
9668 generate any code. Then probe at FIRST + SIZE. */
9669 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9670 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9672 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9675 /* Otherwise, do the same as above, but in a loop. Note that we must be
9676 extra careful with variables wrapping around because we might be at
9677 the very top (or the very bottom) of the address space and we have
9678 to be able to handle this case properly; in particular, we use an
9679 equality test for the loop condition. */
9682 HOST_WIDE_INT rounded_size, last;
9683 struct scratch_reg sr;
9685 get_scratch_register_on_entry (&sr);
9688 /* Step 1: round SIZE to the previous multiple of the interval. */
9690 rounded_size = size & -PROBE_INTERVAL;
9693 /* Step 2: compute initial and final value of the loop counter. */
9695 /* TEST_OFFSET = FIRST. */
9696 emit_move_insn (sr.reg, GEN_INT (-first));
9698 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9699 last = first + rounded_size;
9704 while (TEST_ADDR != LAST_ADDR)
9706 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9710 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9711 until it is equal to ROUNDED_SIZE. */
9713 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9716 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9717 that SIZE is equal to ROUNDED_SIZE. */
9719 if (size != rounded_size)
9720 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9723 rounded_size - size));
9725 release_scratch_register_on_entry (&sr);
9728 /* Make sure nothing is scheduled before we are done. */
9729 emit_insn (gen_blockage ());
9732 /* Probe a range of stack addresses from REG to END, inclusive. These are
9733 offsets from the current stack pointer. */
9736 output_probe_stack_range (rtx reg, rtx end)
9738 static int labelno = 0;
9739 char loop_lab[32], end_lab[32];
9742 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9743 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9745 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9747 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9750 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9751 fputs ("\tje\t", asm_out_file);
9752 assemble_name_raw (asm_out_file, end_lab);
9753 fputc ('\n', asm_out_file);
9755 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9756 xops[1] = GEN_INT (PROBE_INTERVAL);
9757 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9759 /* Probe at TEST_ADDR. */
9760 xops[0] = stack_pointer_rtx;
9762 xops[2] = const0_rtx;
9763 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9765 fprintf (asm_out_file, "\tjmp\t");
9766 assemble_name_raw (asm_out_file, loop_lab);
9767 fputc ('\n', asm_out_file);
9769 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9774 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9775 to be generated in correct form. */
9777 ix86_finalize_stack_realign_flags (void)
9779 /* Check if stack realign is really needed after reload, and
9780 stores result in cfun */
9781 unsigned int incoming_stack_boundary
9782 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9783 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9784 unsigned int stack_realign = (incoming_stack_boundary
9785 < (current_function_is_leaf
9786 ? crtl->max_used_stack_slot_alignment
9787 : crtl->stack_alignment_needed));
9789 if (crtl->stack_realign_finalized)
9791 /* After stack_realign_needed is finalized, we can't no longer
9793 gcc_assert (crtl->stack_realign_needed == stack_realign);
9797 crtl->stack_realign_needed = stack_realign;
9798 crtl->stack_realign_finalized = true;
9802 /* Expand the prologue into a bunch of separate insns. */
9805 ix86_expand_prologue (void)
9807 struct machine_function *m = cfun->machine;
9810 struct ix86_frame frame;
9811 HOST_WIDE_INT allocate;
9812 bool int_registers_saved;
9814 ix86_finalize_stack_realign_flags ();
9816 /* DRAP should not coexist with stack_realign_fp */
9817 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9819 memset (&m->fs, 0, sizeof (m->fs));
9821 /* Initialize CFA state for before the prologue. */
9822 m->fs.cfa_reg = stack_pointer_rtx;
9823 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9825 /* Track SP offset to the CFA. We continue tracking this after we've
9826 swapped the CFA register away from SP. In the case of re-alignment
9827 this is fudged; we're interested to offsets within the local frame. */
9828 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9829 m->fs.sp_valid = true;
9831 ix86_compute_frame_layout (&frame);
9833 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9835 /* We should have already generated an error for any use of
9836 ms_hook on a nested function. */
9837 gcc_checking_assert (!ix86_static_chain_on_stack);
9839 /* Check if profiling is active and we shall use profiling before
9840 prologue variant. If so sorry. */
9841 if (crtl->profile && flag_fentry != 0)
9842 sorry ("ms_hook_prologue attribute isn%'t compatible "
9843 "with -mfentry for 32-bit");
9845 /* In ix86_asm_output_function_label we emitted:
9846 8b ff movl.s %edi,%edi
9848 8b ec movl.s %esp,%ebp
9850 This matches the hookable function prologue in Win32 API
9851 functions in Microsoft Windows XP Service Pack 2 and newer.
9852 Wine uses this to enable Windows apps to hook the Win32 API
9853 functions provided by Wine.
9855 What that means is that we've already set up the frame pointer. */
9857 if (frame_pointer_needed
9858 && !(crtl->drap_reg && crtl->stack_realign_needed))
9862 /* We've decided to use the frame pointer already set up.
9863 Describe this to the unwinder by pretending that both
9864 push and mov insns happen right here.
9866 Putting the unwind info here at the end of the ms_hook
9867 is done so that we can make absolutely certain we get
9868 the required byte sequence at the start of the function,
9869 rather than relying on an assembler that can produce
9870 the exact encoding required.
9872 However it does mean (in the unpatched case) that we have
9873 a 1 insn window where the asynchronous unwind info is
9874 incorrect. However, if we placed the unwind info at
9875 its correct location we would have incorrect unwind info
9876 in the patched case. Which is probably all moot since
9877 I don't expect Wine generates dwarf2 unwind info for the
9878 system libraries that use this feature. */
9880 insn = emit_insn (gen_blockage ());
9882 push = gen_push (hard_frame_pointer_rtx);
9883 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9885 RTX_FRAME_RELATED_P (push) = 1;
9886 RTX_FRAME_RELATED_P (mov) = 1;
9888 RTX_FRAME_RELATED_P (insn) = 1;
9889 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9890 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9892 /* Note that gen_push incremented m->fs.cfa_offset, even
9893 though we didn't emit the push insn here. */
9894 m->fs.cfa_reg = hard_frame_pointer_rtx;
9895 m->fs.fp_offset = m->fs.cfa_offset;
9896 m->fs.fp_valid = true;
9900 /* The frame pointer is not needed so pop %ebp again.
9901 This leaves us with a pristine state. */
9902 emit_insn (gen_pop (hard_frame_pointer_rtx));
9906 /* The first insn of a function that accepts its static chain on the
9907 stack is to push the register that would be filled in by a direct
9908 call. This insn will be skipped by the trampoline. */
9909 else if (ix86_static_chain_on_stack)
9911 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9912 emit_insn (gen_blockage ());
9914 /* We don't want to interpret this push insn as a register save,
9915 only as a stack adjustment. The real copy of the register as
9916 a save will be done later, if needed. */
9917 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9918 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9919 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9920 RTX_FRAME_RELATED_P (insn) = 1;
9923 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9924 of DRAP is needed and stack realignment is really needed after reload */
9925 if (stack_realign_drap)
9927 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9929 /* Only need to push parameter pointer reg if it is caller saved. */
9930 if (!call_used_regs[REGNO (crtl->drap_reg)])
9932 /* Push arg pointer reg */
9933 insn = emit_insn (gen_push (crtl->drap_reg));
9934 RTX_FRAME_RELATED_P (insn) = 1;
9937 /* Grab the argument pointer. */
9938 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9939 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9940 RTX_FRAME_RELATED_P (insn) = 1;
9941 m->fs.cfa_reg = crtl->drap_reg;
9942 m->fs.cfa_offset = 0;
9944 /* Align the stack. */
9945 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9947 GEN_INT (-align_bytes)));
9948 RTX_FRAME_RELATED_P (insn) = 1;
9950 /* Replicate the return address on the stack so that return
9951 address can be reached via (argp - 1) slot. This is needed
9952 to implement macro RETURN_ADDR_RTX and intrinsic function
9953 expand_builtin_return_addr etc. */
9954 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9955 t = gen_frame_mem (Pmode, t);
9956 insn = emit_insn (gen_push (t));
9957 RTX_FRAME_RELATED_P (insn) = 1;
9959 /* For the purposes of frame and register save area addressing,
9960 we've started over with a new frame. */
9961 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9962 m->fs.realigned = true;
9965 if (frame_pointer_needed && !m->fs.fp_valid)
9967 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9968 slower on all targets. Also sdb doesn't like it. */
9969 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9970 RTX_FRAME_RELATED_P (insn) = 1;
9972 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9974 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9975 RTX_FRAME_RELATED_P (insn) = 1;
9977 if (m->fs.cfa_reg == stack_pointer_rtx)
9978 m->fs.cfa_reg = hard_frame_pointer_rtx;
9979 m->fs.fp_offset = m->fs.sp_offset;
9980 m->fs.fp_valid = true;
9984 int_registers_saved = (frame.nregs == 0);
9986 if (!int_registers_saved)
9988 /* If saving registers via PUSH, do so now. */
9989 if (!frame.save_regs_using_mov)
9991 ix86_emit_save_regs ();
9992 int_registers_saved = true;
9993 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9996 /* When using red zone we may start register saving before allocating
9997 the stack frame saving one cycle of the prologue. However, avoid
9998 doing this if we have to probe the stack; at least on x86_64 the
9999 stack probe can turn into a call that clobbers a red zone location. */
10000 else if (ix86_using_red_zone ()
10001 && (! TARGET_STACK_PROBE
10002 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10004 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10005 int_registers_saved = true;
10009 if (stack_realign_fp)
10011 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10012 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10014 /* The computation of the size of the re-aligned stack frame means
10015 that we must allocate the size of the register save area before
10016 performing the actual alignment. Otherwise we cannot guarantee
10017 that there's enough storage above the realignment point. */
10018 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10019 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10020 GEN_INT (m->fs.sp_offset
10021 - frame.sse_reg_save_offset),
10024 /* Align the stack. */
10025 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10027 GEN_INT (-align_bytes)));
10029 /* For the purposes of register save area addressing, the stack
10030 pointer is no longer valid. As for the value of sp_offset,
10031 see ix86_compute_frame_layout, which we need to match in order
10032 to pass verification of stack_pointer_offset at the end. */
10033 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10034 m->fs.sp_valid = false;
10037 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10039 if (flag_stack_usage_info)
10041 /* We start to count from ARG_POINTER. */
10042 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10044 /* If it was realigned, take into account the fake frame. */
10045 if (stack_realign_drap)
10047 if (ix86_static_chain_on_stack)
10048 stack_size += UNITS_PER_WORD;
10050 if (!call_used_regs[REGNO (crtl->drap_reg)])
10051 stack_size += UNITS_PER_WORD;
10053 /* This over-estimates by 1 minimal-stack-alignment-unit but
10054 mitigates that by counting in the new return address slot. */
10055 current_function_dynamic_stack_size
10056 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10059 current_function_static_stack_size = stack_size;
10062 /* The stack has already been decremented by the instruction calling us
10063 so probe if the size is non-negative to preserve the protection area. */
10064 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10066 /* We expect the registers to be saved when probes are used. */
10067 gcc_assert (int_registers_saved);
10069 if (STACK_CHECK_MOVING_SP)
10071 ix86_adjust_stack_and_probe (allocate);
10076 HOST_WIDE_INT size = allocate;
10078 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10079 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10081 if (TARGET_STACK_PROBE)
10082 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10084 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10090 else if (!ix86_target_stack_probe ()
10091 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10093 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10094 GEN_INT (-allocate), -1,
10095 m->fs.cfa_reg == stack_pointer_rtx);
10099 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10101 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10103 bool eax_live = false;
10104 bool r10_live = false;
10107 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10108 if (!TARGET_64BIT_MS_ABI)
10109 eax_live = ix86_eax_live_at_start_p ();
10113 emit_insn (gen_push (eax));
10114 allocate -= UNITS_PER_WORD;
10118 r10 = gen_rtx_REG (Pmode, R10_REG);
10119 emit_insn (gen_push (r10));
10120 allocate -= UNITS_PER_WORD;
10123 emit_move_insn (eax, GEN_INT (allocate));
10124 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10126 /* Use the fact that AX still contains ALLOCATE. */
10127 adjust_stack_insn = (TARGET_64BIT
10128 ? gen_pro_epilogue_adjust_stack_di_sub
10129 : gen_pro_epilogue_adjust_stack_si_sub);
10131 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10132 stack_pointer_rtx, eax));
10134 /* Note that SEH directives need to continue tracking the stack
10135 pointer even after the frame pointer has been set up. */
10136 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10138 if (m->fs.cfa_reg == stack_pointer_rtx)
10139 m->fs.cfa_offset += allocate;
10141 RTX_FRAME_RELATED_P (insn) = 1;
10142 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10143 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10144 plus_constant (stack_pointer_rtx,
10147 m->fs.sp_offset += allocate;
10149 if (r10_live && eax_live)
10151 t = choose_baseaddr (m->fs.sp_offset - allocate);
10152 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10153 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10154 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10156 else if (eax_live || r10_live)
10158 t = choose_baseaddr (m->fs.sp_offset - allocate);
10159 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10162 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10164 /* If we havn't already set up the frame pointer, do so now. */
10165 if (frame_pointer_needed && !m->fs.fp_valid)
10167 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10168 GEN_INT (frame.stack_pointer_offset
10169 - frame.hard_frame_pointer_offset));
10170 insn = emit_insn (insn);
10171 RTX_FRAME_RELATED_P (insn) = 1;
10172 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10174 if (m->fs.cfa_reg == stack_pointer_rtx)
10175 m->fs.cfa_reg = hard_frame_pointer_rtx;
10176 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10177 m->fs.fp_valid = true;
10180 if (!int_registers_saved)
10181 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10182 if (frame.nsseregs)
10183 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10185 pic_reg_used = false;
10186 if (pic_offset_table_rtx
10187 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10190 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10192 if (alt_pic_reg_used != INVALID_REGNUM)
10193 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10195 pic_reg_used = true;
10202 if (ix86_cmodel == CM_LARGE_PIC)
10204 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10205 rtx label = gen_label_rtx ();
10206 emit_label (label);
10207 LABEL_PRESERVE_P (label) = 1;
10208 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10209 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10210 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10211 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10212 pic_offset_table_rtx, tmp_reg));
10215 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10219 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10220 RTX_FRAME_RELATED_P (insn) = 1;
10221 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10225 /* In the pic_reg_used case, make sure that the got load isn't deleted
10226 when mcount needs it. Blockage to avoid call movement across mcount
10227 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10229 if (crtl->profile && !flag_fentry && pic_reg_used)
10230 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10232 if (crtl->drap_reg && !crtl->stack_realign_needed)
10234 /* vDRAP is setup but after reload it turns out stack realign
10235 isn't necessary, here we will emit prologue to setup DRAP
10236 without stack realign adjustment */
10237 t = choose_baseaddr (0);
10238 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10241 /* Prevent instructions from being scheduled into register save push
10242 sequence when access to the redzone area is done through frame pointer.
10243 The offset between the frame pointer and the stack pointer is calculated
10244 relative to the value of the stack pointer at the end of the function
10245 prologue, and moving instructions that access redzone area via frame
10246 pointer inside push sequence violates this assumption. */
10247 if (frame_pointer_needed && frame.red_zone_size)
10248 emit_insn (gen_memory_blockage ());
10250 /* Emit cld instruction if stringops are used in the function. */
10251 if (TARGET_CLD && ix86_current_function_needs_cld)
10252 emit_insn (gen_cld ());
10254 /* SEH requires that the prologue end within 256 bytes of the start of
10255 the function. Prevent instruction schedules that would extend that.
10256 Further, prevent alloca modifications to the stack pointer from being
10257 combined with prologue modifications. */
10259 emit_insn (gen_prologue_use (stack_pointer_rtx));
10262 /* Emit code to restore REG using a POP insn. */
10265 ix86_emit_restore_reg_using_pop (rtx reg)
10267 struct machine_function *m = cfun->machine;
10268 rtx insn = emit_insn (gen_pop (reg));
10270 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10271 m->fs.sp_offset -= UNITS_PER_WORD;
10273 if (m->fs.cfa_reg == crtl->drap_reg
10274 && REGNO (reg) == REGNO (crtl->drap_reg))
10276 /* Previously we'd represented the CFA as an expression
10277 like *(%ebp - 8). We've just popped that value from
10278 the stack, which means we need to reset the CFA to
10279 the drap register. This will remain until we restore
10280 the stack pointer. */
10281 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10282 RTX_FRAME_RELATED_P (insn) = 1;
10284 /* This means that the DRAP register is valid for addressing too. */
10285 m->fs.drap_valid = true;
10289 if (m->fs.cfa_reg == stack_pointer_rtx)
10291 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10292 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10293 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10294 RTX_FRAME_RELATED_P (insn) = 1;
10296 m->fs.cfa_offset -= UNITS_PER_WORD;
10299 /* When the frame pointer is the CFA, and we pop it, we are
10300 swapping back to the stack pointer as the CFA. This happens
10301 for stack frames that don't allocate other data, so we assume
10302 the stack pointer is now pointing at the return address, i.e.
10303 the function entry state, which makes the offset be 1 word. */
10304 if (reg == hard_frame_pointer_rtx)
10306 m->fs.fp_valid = false;
10307 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10309 m->fs.cfa_reg = stack_pointer_rtx;
10310 m->fs.cfa_offset -= UNITS_PER_WORD;
10312 add_reg_note (insn, REG_CFA_DEF_CFA,
10313 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10314 GEN_INT (m->fs.cfa_offset)));
10315 RTX_FRAME_RELATED_P (insn) = 1;
10320 /* Emit code to restore saved registers using POP insns. */
10323 ix86_emit_restore_regs_using_pop (void)
10325 unsigned int regno;
10327 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10328 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10329 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10332 /* Emit code and notes for the LEAVE instruction. */
10335 ix86_emit_leave (void)
10337 struct machine_function *m = cfun->machine;
10338 rtx insn = emit_insn (ix86_gen_leave ());
10340 ix86_add_queued_cfa_restore_notes (insn);
10342 gcc_assert (m->fs.fp_valid);
10343 m->fs.sp_valid = true;
10344 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10345 m->fs.fp_valid = false;
10347 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10349 m->fs.cfa_reg = stack_pointer_rtx;
10350 m->fs.cfa_offset = m->fs.sp_offset;
10352 add_reg_note (insn, REG_CFA_DEF_CFA,
10353 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10354 RTX_FRAME_RELATED_P (insn) = 1;
10355 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10360 /* Emit code to restore saved registers using MOV insns.
10361 First register is restored from CFA - CFA_OFFSET. */
10363 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10364 bool maybe_eh_return)
10366 struct machine_function *m = cfun->machine;
10367 unsigned int regno;
10369 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10370 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10372 rtx reg = gen_rtx_REG (Pmode, regno);
10375 mem = choose_baseaddr (cfa_offset);
10376 mem = gen_frame_mem (Pmode, mem);
10377 insn = emit_move_insn (reg, mem);
10379 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10381 /* Previously we'd represented the CFA as an expression
10382 like *(%ebp - 8). We've just popped that value from
10383 the stack, which means we need to reset the CFA to
10384 the drap register. This will remain until we restore
10385 the stack pointer. */
10386 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10387 RTX_FRAME_RELATED_P (insn) = 1;
10389 /* This means that the DRAP register is valid for addressing. */
10390 m->fs.drap_valid = true;
10393 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10395 cfa_offset -= UNITS_PER_WORD;
10399 /* Emit code to restore saved registers using MOV insns.
10400 First register is restored from CFA - CFA_OFFSET. */
10402 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10403 bool maybe_eh_return)
10405 unsigned int regno;
10407 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10408 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10410 rtx reg = gen_rtx_REG (V4SFmode, regno);
10413 mem = choose_baseaddr (cfa_offset);
10414 mem = gen_rtx_MEM (V4SFmode, mem);
10415 set_mem_align (mem, 128);
10416 emit_move_insn (reg, mem);
10418 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10424 /* Restore function stack, frame, and registers. */
10427 ix86_expand_epilogue (int style)
10429 struct machine_function *m = cfun->machine;
10430 struct machine_frame_state frame_state_save = m->fs;
10431 struct ix86_frame frame;
10432 bool restore_regs_via_mov;
10435 ix86_finalize_stack_realign_flags ();
10436 ix86_compute_frame_layout (&frame);
10438 m->fs.sp_valid = (!frame_pointer_needed
10439 || (current_function_sp_is_unchanging
10440 && !stack_realign_fp));
10441 gcc_assert (!m->fs.sp_valid
10442 || m->fs.sp_offset == frame.stack_pointer_offset);
10444 /* The FP must be valid if the frame pointer is present. */
10445 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10446 gcc_assert (!m->fs.fp_valid
10447 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10449 /* We must have *some* valid pointer to the stack frame. */
10450 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10452 /* The DRAP is never valid at this point. */
10453 gcc_assert (!m->fs.drap_valid);
10455 /* See the comment about red zone and frame
10456 pointer usage in ix86_expand_prologue. */
10457 if (frame_pointer_needed && frame.red_zone_size)
10458 emit_insn (gen_memory_blockage ());
10460 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10461 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10463 /* Determine the CFA offset of the end of the red-zone. */
10464 m->fs.red_zone_offset = 0;
10465 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10467 /* The red-zone begins below the return address. */
10468 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10470 /* When the register save area is in the aligned portion of
10471 the stack, determine the maximum runtime displacement that
10472 matches up with the aligned frame. */
10473 if (stack_realign_drap)
10474 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10478 /* Special care must be taken for the normal return case of a function
10479 using eh_return: the eax and edx registers are marked as saved, but
10480 not restored along this path. Adjust the save location to match. */
10481 if (crtl->calls_eh_return && style != 2)
10482 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10484 /* EH_RETURN requires the use of moves to function properly. */
10485 if (crtl->calls_eh_return)
10486 restore_regs_via_mov = true;
10487 /* SEH requires the use of pops to identify the epilogue. */
10488 else if (TARGET_SEH)
10489 restore_regs_via_mov = false;
10490 /* If we're only restoring one register and sp is not valid then
10491 using a move instruction to restore the register since it's
10492 less work than reloading sp and popping the register. */
10493 else if (!m->fs.sp_valid && frame.nregs <= 1)
10494 restore_regs_via_mov = true;
10495 else if (TARGET_EPILOGUE_USING_MOVE
10496 && cfun->machine->use_fast_prologue_epilogue
10497 && (frame.nregs > 1
10498 || m->fs.sp_offset != frame.reg_save_offset))
10499 restore_regs_via_mov = true;
10500 else if (frame_pointer_needed
10502 && m->fs.sp_offset != frame.reg_save_offset)
10503 restore_regs_via_mov = true;
10504 else if (frame_pointer_needed
10505 && TARGET_USE_LEAVE
10506 && cfun->machine->use_fast_prologue_epilogue
10507 && frame.nregs == 1)
10508 restore_regs_via_mov = true;
10510 restore_regs_via_mov = false;
10512 if (restore_regs_via_mov || frame.nsseregs)
10514 /* Ensure that the entire register save area is addressable via
10515 the stack pointer, if we will restore via sp. */
10517 && m->fs.sp_offset > 0x7fffffff
10518 && !(m->fs.fp_valid || m->fs.drap_valid)
10519 && (frame.nsseregs + frame.nregs) != 0)
10521 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10522 GEN_INT (m->fs.sp_offset
10523 - frame.sse_reg_save_offset),
10525 m->fs.cfa_reg == stack_pointer_rtx);
10529 /* If there are any SSE registers to restore, then we have to do it
10530 via moves, since there's obviously no pop for SSE regs. */
10531 if (frame.nsseregs)
10532 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10535 if (restore_regs_via_mov)
10540 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10542 /* eh_return epilogues need %ecx added to the stack pointer. */
10545 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10547 /* Stack align doesn't work with eh_return. */
10548 gcc_assert (!stack_realign_drap);
10549 /* Neither does regparm nested functions. */
10550 gcc_assert (!ix86_static_chain_on_stack);
10552 if (frame_pointer_needed)
10554 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10555 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10556 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10558 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10559 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10561 /* Note that we use SA as a temporary CFA, as the return
10562 address is at the proper place relative to it. We
10563 pretend this happens at the FP restore insn because
10564 prior to this insn the FP would be stored at the wrong
10565 offset relative to SA, and after this insn we have no
10566 other reasonable register to use for the CFA. We don't
10567 bother resetting the CFA to the SP for the duration of
10568 the return insn. */
10569 add_reg_note (insn, REG_CFA_DEF_CFA,
10570 plus_constant (sa, UNITS_PER_WORD));
10571 ix86_add_queued_cfa_restore_notes (insn);
10572 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10573 RTX_FRAME_RELATED_P (insn) = 1;
10575 m->fs.cfa_reg = sa;
10576 m->fs.cfa_offset = UNITS_PER_WORD;
10577 m->fs.fp_valid = false;
10579 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10580 const0_rtx, style, false);
10584 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10585 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10586 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10587 ix86_add_queued_cfa_restore_notes (insn);
10589 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10590 if (m->fs.cfa_offset != UNITS_PER_WORD)
10592 m->fs.cfa_offset = UNITS_PER_WORD;
10593 add_reg_note (insn, REG_CFA_DEF_CFA,
10594 plus_constant (stack_pointer_rtx,
10596 RTX_FRAME_RELATED_P (insn) = 1;
10599 m->fs.sp_offset = UNITS_PER_WORD;
10600 m->fs.sp_valid = true;
10605 /* SEH requires that the function end with (1) a stack adjustment
10606 if necessary, (2) a sequence of pops, and (3) a return or
10607 jump instruction. Prevent insns from the function body from
10608 being scheduled into this sequence. */
10611 /* Prevent a catch region from being adjacent to the standard
10612 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10613 several other flags that would be interesting to test are
10615 if (flag_non_call_exceptions)
10616 emit_insn (gen_nops (const1_rtx));
10618 emit_insn (gen_blockage ());
10621 /* First step is to deallocate the stack frame so that we can
10622 pop the registers. */
10623 if (!m->fs.sp_valid)
10625 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10626 GEN_INT (m->fs.fp_offset
10627 - frame.reg_save_offset),
10630 else if (m->fs.sp_offset != frame.reg_save_offset)
10632 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10633 GEN_INT (m->fs.sp_offset
10634 - frame.reg_save_offset),
10636 m->fs.cfa_reg == stack_pointer_rtx);
10639 ix86_emit_restore_regs_using_pop ();
10642 /* If we used a stack pointer and haven't already got rid of it,
10644 if (m->fs.fp_valid)
10646 /* If the stack pointer is valid and pointing at the frame
10647 pointer store address, then we only need a pop. */
10648 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10649 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10650 /* Leave results in shorter dependency chains on CPUs that are
10651 able to grok it fast. */
10652 else if (TARGET_USE_LEAVE
10653 || optimize_function_for_size_p (cfun)
10654 || !cfun->machine->use_fast_prologue_epilogue)
10655 ix86_emit_leave ();
10658 pro_epilogue_adjust_stack (stack_pointer_rtx,
10659 hard_frame_pointer_rtx,
10660 const0_rtx, style, !using_drap);
10661 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10667 int param_ptr_offset = UNITS_PER_WORD;
10670 gcc_assert (stack_realign_drap);
10672 if (ix86_static_chain_on_stack)
10673 param_ptr_offset += UNITS_PER_WORD;
10674 if (!call_used_regs[REGNO (crtl->drap_reg)])
10675 param_ptr_offset += UNITS_PER_WORD;
10677 insn = emit_insn (gen_rtx_SET
10678 (VOIDmode, stack_pointer_rtx,
10679 gen_rtx_PLUS (Pmode,
10681 GEN_INT (-param_ptr_offset))));
10682 m->fs.cfa_reg = stack_pointer_rtx;
10683 m->fs.cfa_offset = param_ptr_offset;
10684 m->fs.sp_offset = param_ptr_offset;
10685 m->fs.realigned = false;
10687 add_reg_note (insn, REG_CFA_DEF_CFA,
10688 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10689 GEN_INT (param_ptr_offset)));
10690 RTX_FRAME_RELATED_P (insn) = 1;
10692 if (!call_used_regs[REGNO (crtl->drap_reg)])
10693 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10696 /* At this point the stack pointer must be valid, and we must have
10697 restored all of the registers. We may not have deallocated the
10698 entire stack frame. We've delayed this until now because it may
10699 be possible to merge the local stack deallocation with the
10700 deallocation forced by ix86_static_chain_on_stack. */
10701 gcc_assert (m->fs.sp_valid);
10702 gcc_assert (!m->fs.fp_valid);
10703 gcc_assert (!m->fs.realigned);
10704 if (m->fs.sp_offset != UNITS_PER_WORD)
10706 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10707 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10711 /* Sibcall epilogues don't want a return instruction. */
10714 m->fs = frame_state_save;
10718 /* Emit vzeroupper if needed. */
10719 if (TARGET_VZEROUPPER
10720 && !TREE_THIS_VOLATILE (cfun->decl)
10721 && !cfun->machine->caller_return_avx256_p)
10722 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10724 if (crtl->args.pops_args && crtl->args.size)
10726 rtx popc = GEN_INT (crtl->args.pops_args);
10728 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10729 address, do explicit add, and jump indirectly to the caller. */
10731 if (crtl->args.pops_args >= 65536)
10733 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10736 /* There is no "pascal" calling convention in any 64bit ABI. */
10737 gcc_assert (!TARGET_64BIT);
10739 insn = emit_insn (gen_pop (ecx));
10740 m->fs.cfa_offset -= UNITS_PER_WORD;
10741 m->fs.sp_offset -= UNITS_PER_WORD;
10743 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10744 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10745 add_reg_note (insn, REG_CFA_REGISTER,
10746 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10747 RTX_FRAME_RELATED_P (insn) = 1;
10749 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10751 emit_jump_insn (gen_return_indirect_internal (ecx));
10754 emit_jump_insn (gen_return_pop_internal (popc));
10757 emit_jump_insn (gen_return_internal ());
10759 /* Restore the state back to the state from the prologue,
10760 so that it's correct for the next epilogue. */
10761 m->fs = frame_state_save;
10764 /* Reset from the function's potential modifications. */
10767 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10768 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10770 if (pic_offset_table_rtx)
10771 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10773 /* Mach-O doesn't support labels at the end of objects, so if
10774 it looks like we might want one, insert a NOP. */
10776 rtx insn = get_last_insn ();
10779 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10780 insn = PREV_INSN (insn);
10784 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10785 fputs ("\tnop\n", file);
10791 /* Return a scratch register to use in the split stack prologue. The
10792 split stack prologue is used for -fsplit-stack. It is the first
10793 instructions in the function, even before the regular prologue.
10794 The scratch register can be any caller-saved register which is not
10795 used for parameters or for the static chain. */
10797 static unsigned int
10798 split_stack_prologue_scratch_regno (void)
10807 is_fastcall = (lookup_attribute ("fastcall",
10808 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10810 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10814 if (DECL_STATIC_CHAIN (cfun->decl))
10816 sorry ("-fsplit-stack does not support fastcall with "
10817 "nested function");
10818 return INVALID_REGNUM;
10822 else if (regparm < 3)
10824 if (!DECL_STATIC_CHAIN (cfun->decl))
10830 sorry ("-fsplit-stack does not support 2 register "
10831 " parameters for a nested function");
10832 return INVALID_REGNUM;
10839 /* FIXME: We could make this work by pushing a register
10840 around the addition and comparison. */
10841 sorry ("-fsplit-stack does not support 3 register parameters");
10842 return INVALID_REGNUM;
10847 /* A SYMBOL_REF for the function which allocates new stackspace for
10850 static GTY(()) rtx split_stack_fn;
10852 /* A SYMBOL_REF for the more stack function when using the large
10855 static GTY(()) rtx split_stack_fn_large;
10857 /* Handle -fsplit-stack. These are the first instructions in the
10858 function, even before the regular prologue. */
10861 ix86_expand_split_stack_prologue (void)
10863 struct ix86_frame frame;
10864 HOST_WIDE_INT allocate;
10865 unsigned HOST_WIDE_INT args_size;
10866 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10867 rtx scratch_reg = NULL_RTX;
10868 rtx varargs_label = NULL_RTX;
10871 gcc_assert (flag_split_stack && reload_completed);
10873 ix86_finalize_stack_realign_flags ();
10874 ix86_compute_frame_layout (&frame);
10875 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10877 /* This is the label we will branch to if we have enough stack
10878 space. We expect the basic block reordering pass to reverse this
10879 branch if optimizing, so that we branch in the unlikely case. */
10880 label = gen_label_rtx ();
10882 /* We need to compare the stack pointer minus the frame size with
10883 the stack boundary in the TCB. The stack boundary always gives
10884 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10885 can compare directly. Otherwise we need to do an addition. */
10887 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10888 UNSPEC_STACK_CHECK);
10889 limit = gen_rtx_CONST (Pmode, limit);
10890 limit = gen_rtx_MEM (Pmode, limit);
10891 if (allocate < SPLIT_STACK_AVAILABLE)
10892 current = stack_pointer_rtx;
10895 unsigned int scratch_regno;
10898 /* We need a scratch register to hold the stack pointer minus
10899 the required frame size. Since this is the very start of the
10900 function, the scratch register can be any caller-saved
10901 register which is not used for parameters. */
10902 offset = GEN_INT (- allocate);
10903 scratch_regno = split_stack_prologue_scratch_regno ();
10904 if (scratch_regno == INVALID_REGNUM)
10906 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10907 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10909 /* We don't use ix86_gen_add3 in this case because it will
10910 want to split to lea, but when not optimizing the insn
10911 will not be split after this point. */
10912 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10913 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10918 emit_move_insn (scratch_reg, offset);
10919 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10920 stack_pointer_rtx));
10922 current = scratch_reg;
10925 ix86_expand_branch (GEU, current, limit, label);
10926 jump_insn = get_last_insn ();
10927 JUMP_LABEL (jump_insn) = label;
10929 /* Mark the jump as very likely to be taken. */
10930 add_reg_note (jump_insn, REG_BR_PROB,
10931 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10933 if (split_stack_fn == NULL_RTX)
10934 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10935 fn = split_stack_fn;
10937 /* Get more stack space. We pass in the desired stack space and the
10938 size of the arguments to copy to the new stack. In 32-bit mode
10939 we push the parameters; __morestack will return on a new stack
10940 anyhow. In 64-bit mode we pass the parameters in r10 and
10942 allocate_rtx = GEN_INT (allocate);
10943 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10944 call_fusage = NULL_RTX;
10949 reg10 = gen_rtx_REG (Pmode, R10_REG);
10950 reg11 = gen_rtx_REG (Pmode, R11_REG);
10952 /* If this function uses a static chain, it will be in %r10.
10953 Preserve it across the call to __morestack. */
10954 if (DECL_STATIC_CHAIN (cfun->decl))
10958 rax = gen_rtx_REG (Pmode, AX_REG);
10959 emit_move_insn (rax, reg10);
10960 use_reg (&call_fusage, rax);
10963 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10965 HOST_WIDE_INT argval;
10967 /* When using the large model we need to load the address
10968 into a register, and we've run out of registers. So we
10969 switch to a different calling convention, and we call a
10970 different function: __morestack_large. We pass the
10971 argument size in the upper 32 bits of r10 and pass the
10972 frame size in the lower 32 bits. */
10973 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
10974 gcc_assert ((args_size & 0xffffffff) == args_size);
10976 if (split_stack_fn_large == NULL_RTX)
10977 split_stack_fn_large =
10978 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10980 if (ix86_cmodel == CM_LARGE_PIC)
10984 label = gen_label_rtx ();
10985 emit_label (label);
10986 LABEL_PRESERVE_P (label) = 1;
10987 emit_insn (gen_set_rip_rex64 (reg10, label));
10988 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10989 emit_insn (gen_adddi3 (reg10, reg10, reg11));
10990 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10992 x = gen_rtx_CONST (Pmode, x);
10993 emit_move_insn (reg11, x);
10994 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10995 x = gen_const_mem (Pmode, x);
10996 emit_move_insn (reg11, x);
10999 emit_move_insn (reg11, split_stack_fn_large);
11003 argval = ((args_size << 16) << 16) + allocate;
11004 emit_move_insn (reg10, GEN_INT (argval));
11008 emit_move_insn (reg10, allocate_rtx);
11009 emit_move_insn (reg11, GEN_INT (args_size));
11010 use_reg (&call_fusage, reg11);
11013 use_reg (&call_fusage, reg10);
11017 emit_insn (gen_push (GEN_INT (args_size)));
11018 emit_insn (gen_push (allocate_rtx));
11020 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11021 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11023 add_function_usage_to (call_insn, call_fusage);
11025 /* In order to make call/return prediction work right, we now need
11026 to execute a return instruction. See
11027 libgcc/config/i386/morestack.S for the details on how this works.
11029 For flow purposes gcc must not see this as a return
11030 instruction--we need control flow to continue at the subsequent
11031 label. Therefore, we use an unspec. */
11032 gcc_assert (crtl->args.pops_args < 65536);
11033 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11035 /* If we are in 64-bit mode and this function uses a static chain,
11036 we saved %r10 in %rax before calling _morestack. */
11037 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11038 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11039 gen_rtx_REG (Pmode, AX_REG));
11041 /* If this function calls va_start, we need to store a pointer to
11042 the arguments on the old stack, because they may not have been
11043 all copied to the new stack. At this point the old stack can be
11044 found at the frame pointer value used by __morestack, because
11045 __morestack has set that up before calling back to us. Here we
11046 store that pointer in a scratch register, and in
11047 ix86_expand_prologue we store the scratch register in a stack
11049 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11051 unsigned int scratch_regno;
11055 scratch_regno = split_stack_prologue_scratch_regno ();
11056 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11057 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11061 return address within this function
11062 return address of caller of this function
11064 So we add three words to get to the stack arguments.
11068 return address within this function
11069 first argument to __morestack
11070 second argument to __morestack
11071 return address of caller of this function
11073 So we add five words to get to the stack arguments.
11075 words = TARGET_64BIT ? 3 : 5;
11076 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11077 gen_rtx_PLUS (Pmode, frame_reg,
11078 GEN_INT (words * UNITS_PER_WORD))));
11080 varargs_label = gen_label_rtx ();
11081 emit_jump_insn (gen_jump (varargs_label));
11082 JUMP_LABEL (get_last_insn ()) = varargs_label;
11087 emit_label (label);
11088 LABEL_NUSES (label) = 1;
11090 /* If this function calls va_start, we now have to set the scratch
11091 register for the case where we do not call __morestack. In this
11092 case we need to set it based on the stack pointer. */
11093 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11095 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11096 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11097 GEN_INT (UNITS_PER_WORD))));
11099 emit_label (varargs_label);
11100 LABEL_NUSES (varargs_label) = 1;
11104 /* We may have to tell the dataflow pass that the split stack prologue
11105 is initializing a scratch register. */
11108 ix86_live_on_entry (bitmap regs)
11110 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11112 gcc_assert (flag_split_stack);
11113 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11117 /* Determine if op is suitable SUBREG RTX for address. */
11120 ix86_address_subreg_operand (rtx op)
11122 enum machine_mode mode;
11127 mode = GET_MODE (op);
11129 if (GET_MODE_CLASS (mode) != MODE_INT)
11132 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11133 failures when the register is one word out of a two word structure. */
11134 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11137 /* Allow only SUBREGs of non-eliminable hard registers. */
11138 return register_no_elim_operand (op, mode);
11141 /* Extract the parts of an RTL expression that is a valid memory address
11142 for an instruction. Return 0 if the structure of the address is
11143 grossly off. Return -1 if the address contains ASHIFT, so it is not
11144 strictly valid, but still used for computing length of lea instruction. */
11147 ix86_decompose_address (rtx addr, struct ix86_address *out)
11149 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11150 rtx base_reg, index_reg;
11151 HOST_WIDE_INT scale = 1;
11152 rtx scale_rtx = NULL_RTX;
11155 enum ix86_address_seg seg = SEG_DEFAULT;
11157 /* Allow zero-extended SImode addresses,
11158 they will be emitted with addr32 prefix. */
11159 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11161 if (GET_CODE (addr) == ZERO_EXTEND
11162 && GET_MODE (XEXP (addr, 0)) == SImode)
11163 addr = XEXP (addr, 0);
11164 else if (GET_CODE (addr) == AND
11165 && const_32bit_mask (XEXP (addr, 1), DImode))
11167 addr = XEXP (addr, 0);
11169 /* Strip subreg. */
11170 if (GET_CODE (addr) == SUBREG
11171 && GET_MODE (SUBREG_REG (addr)) == SImode)
11172 addr = SUBREG_REG (addr);
11178 else if (GET_CODE (addr) == SUBREG)
11180 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11185 else if (GET_CODE (addr) == PLUS)
11187 rtx addends[4], op;
11195 addends[n++] = XEXP (op, 1);
11198 while (GET_CODE (op) == PLUS);
11203 for (i = n; i >= 0; --i)
11206 switch (GET_CODE (op))
11211 index = XEXP (op, 0);
11212 scale_rtx = XEXP (op, 1);
11218 index = XEXP (op, 0);
11219 tmp = XEXP (op, 1);
11220 if (!CONST_INT_P (tmp))
11222 scale = INTVAL (tmp);
11223 if ((unsigned HOST_WIDE_INT) scale > 3)
11225 scale = 1 << scale;
11229 if (XINT (op, 1) == UNSPEC_TP
11230 && TARGET_TLS_DIRECT_SEG_REFS
11231 && seg == SEG_DEFAULT)
11232 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11238 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11265 else if (GET_CODE (addr) == MULT)
11267 index = XEXP (addr, 0); /* index*scale */
11268 scale_rtx = XEXP (addr, 1);
11270 else if (GET_CODE (addr) == ASHIFT)
11272 /* We're called for lea too, which implements ashift on occasion. */
11273 index = XEXP (addr, 0);
11274 tmp = XEXP (addr, 1);
11275 if (!CONST_INT_P (tmp))
11277 scale = INTVAL (tmp);
11278 if ((unsigned HOST_WIDE_INT) scale > 3)
11280 scale = 1 << scale;
11284 disp = addr; /* displacement */
11290 else if (GET_CODE (index) == SUBREG
11291 && ix86_address_subreg_operand (SUBREG_REG (index)))
11297 /* Extract the integral value of scale. */
11300 if (!CONST_INT_P (scale_rtx))
11302 scale = INTVAL (scale_rtx);
11305 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11306 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11308 /* Avoid useless 0 displacement. */
11309 if (disp == const0_rtx && (base || index))
11312 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11313 if (base_reg && index_reg && scale == 1
11314 && (index_reg == arg_pointer_rtx
11315 || index_reg == frame_pointer_rtx
11316 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11319 tmp = base, base = index, index = tmp;
11320 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11323 /* Special case: %ebp cannot be encoded as a base without a displacement.
11327 && (base_reg == hard_frame_pointer_rtx
11328 || base_reg == frame_pointer_rtx
11329 || base_reg == arg_pointer_rtx
11330 || (REG_P (base_reg)
11331 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11332 || REGNO (base_reg) == R13_REG))))
11335 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11336 Avoid this by transforming to [%esi+0].
11337 Reload calls address legitimization without cfun defined, so we need
11338 to test cfun for being non-NULL. */
11339 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11340 && base_reg && !index_reg && !disp
11341 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11344 /* Special case: encode reg+reg instead of reg*2. */
11345 if (!base && index && scale == 2)
11346 base = index, base_reg = index_reg, scale = 1;
11348 /* Special case: scaling cannot be encoded without base or displacement. */
11349 if (!base && !disp && index && scale != 1)
11353 out->index = index;
11355 out->scale = scale;
11361 /* Return cost of the memory address x.
11362 For i386, it is better to use a complex address than let gcc copy
11363 the address into a reg and make a new pseudo. But not if the address
11364 requires to two regs - that would mean more pseudos with longer
11367 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11369 struct ix86_address parts;
11371 int ok = ix86_decompose_address (x, &parts);
11375 if (parts.base && GET_CODE (parts.base) == SUBREG)
11376 parts.base = SUBREG_REG (parts.base);
11377 if (parts.index && GET_CODE (parts.index) == SUBREG)
11378 parts.index = SUBREG_REG (parts.index);
11380 /* Attempt to minimize number of registers in the address. */
11382 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11384 && (!REG_P (parts.index)
11385 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11389 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11391 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11392 && parts.base != parts.index)
11395 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11396 since it's predecode logic can't detect the length of instructions
11397 and it degenerates to vector decoded. Increase cost of such
11398 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11399 to split such addresses or even refuse such addresses at all.
11401 Following addressing modes are affected:
11406 The first and last case may be avoidable by explicitly coding the zero in
11407 memory address, but I don't have AMD-K6 machine handy to check this
11411 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11412 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11413 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11419 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11420 this is used for to form addresses to local data when -fPIC is in
11424 darwin_local_data_pic (rtx disp)
11426 return (GET_CODE (disp) == UNSPEC
11427 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11430 /* Determine if a given RTX is a valid constant. We already know this
11431 satisfies CONSTANT_P. */
11434 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11436 switch (GET_CODE (x))
11441 if (GET_CODE (x) == PLUS)
11443 if (!CONST_INT_P (XEXP (x, 1)))
11448 if (TARGET_MACHO && darwin_local_data_pic (x))
11451 /* Only some unspecs are valid as "constants". */
11452 if (GET_CODE (x) == UNSPEC)
11453 switch (XINT (x, 1))
11456 case UNSPEC_GOTOFF:
11457 case UNSPEC_PLTOFF:
11458 return TARGET_64BIT;
11460 case UNSPEC_NTPOFF:
11461 x = XVECEXP (x, 0, 0);
11462 return (GET_CODE (x) == SYMBOL_REF
11463 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11464 case UNSPEC_DTPOFF:
11465 x = XVECEXP (x, 0, 0);
11466 return (GET_CODE (x) == SYMBOL_REF
11467 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11472 /* We must have drilled down to a symbol. */
11473 if (GET_CODE (x) == LABEL_REF)
11475 if (GET_CODE (x) != SYMBOL_REF)
11480 /* TLS symbols are never valid. */
11481 if (SYMBOL_REF_TLS_MODEL (x))
11484 /* DLLIMPORT symbols are never valid. */
11485 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11486 && SYMBOL_REF_DLLIMPORT_P (x))
11490 /* mdynamic-no-pic */
11491 if (MACHO_DYNAMIC_NO_PIC_P)
11492 return machopic_symbol_defined_p (x);
11497 if (GET_MODE (x) == TImode
11498 && x != CONST0_RTX (TImode)
11504 if (!standard_sse_constant_p (x))
11511 /* Otherwise we handle everything else in the move patterns. */
11515 /* Determine if it's legal to put X into the constant pool. This
11516 is not possible for the address of thread-local symbols, which
11517 is checked above. */
11520 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11522 /* We can always put integral constants and vectors in memory. */
11523 switch (GET_CODE (x))
11533 return !ix86_legitimate_constant_p (mode, x);
11537 /* Nonzero if the constant value X is a legitimate general operand
11538 when generating PIC code. It is given that flag_pic is on and
11539 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11542 legitimate_pic_operand_p (rtx x)
11546 switch (GET_CODE (x))
11549 inner = XEXP (x, 0);
11550 if (GET_CODE (inner) == PLUS
11551 && CONST_INT_P (XEXP (inner, 1)))
11552 inner = XEXP (inner, 0);
11554 /* Only some unspecs are valid as "constants". */
11555 if (GET_CODE (inner) == UNSPEC)
11556 switch (XINT (inner, 1))
11559 case UNSPEC_GOTOFF:
11560 case UNSPEC_PLTOFF:
11561 return TARGET_64BIT;
11563 x = XVECEXP (inner, 0, 0);
11564 return (GET_CODE (x) == SYMBOL_REF
11565 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11566 case UNSPEC_MACHOPIC_OFFSET:
11567 return legitimate_pic_address_disp_p (x);
11575 return legitimate_pic_address_disp_p (x);
11582 /* Determine if a given CONST RTX is a valid memory displacement
11586 legitimate_pic_address_disp_p (rtx disp)
11590 /* In 64bit mode we can allow direct addresses of symbols and labels
11591 when they are not dynamic symbols. */
11594 rtx op0 = disp, op1;
11596 switch (GET_CODE (disp))
11602 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11604 op0 = XEXP (XEXP (disp, 0), 0);
11605 op1 = XEXP (XEXP (disp, 0), 1);
11606 if (!CONST_INT_P (op1)
11607 || INTVAL (op1) >= 16*1024*1024
11608 || INTVAL (op1) < -16*1024*1024)
11610 if (GET_CODE (op0) == LABEL_REF)
11612 if (GET_CODE (op0) != SYMBOL_REF)
11617 /* TLS references should always be enclosed in UNSPEC. */
11618 if (SYMBOL_REF_TLS_MODEL (op0))
11620 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11621 && ix86_cmodel != CM_LARGE_PIC)
11629 if (GET_CODE (disp) != CONST)
11631 disp = XEXP (disp, 0);
11635 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11636 of GOT tables. We should not need these anyway. */
11637 if (GET_CODE (disp) != UNSPEC
11638 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11639 && XINT (disp, 1) != UNSPEC_GOTOFF
11640 && XINT (disp, 1) != UNSPEC_PCREL
11641 && XINT (disp, 1) != UNSPEC_PLTOFF))
11644 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11645 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11651 if (GET_CODE (disp) == PLUS)
11653 if (!CONST_INT_P (XEXP (disp, 1)))
11655 disp = XEXP (disp, 0);
11659 if (TARGET_MACHO && darwin_local_data_pic (disp))
11662 if (GET_CODE (disp) != UNSPEC)
11665 switch (XINT (disp, 1))
11670 /* We need to check for both symbols and labels because VxWorks loads
11671 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11673 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11674 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11675 case UNSPEC_GOTOFF:
11676 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11677 While ABI specify also 32bit relocation but we don't produce it in
11678 small PIC model at all. */
11679 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11680 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11682 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11684 case UNSPEC_GOTTPOFF:
11685 case UNSPEC_GOTNTPOFF:
11686 case UNSPEC_INDNTPOFF:
11689 disp = XVECEXP (disp, 0, 0);
11690 return (GET_CODE (disp) == SYMBOL_REF
11691 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11692 case UNSPEC_NTPOFF:
11693 disp = XVECEXP (disp, 0, 0);
11694 return (GET_CODE (disp) == SYMBOL_REF
11695 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11696 case UNSPEC_DTPOFF:
11697 disp = XVECEXP (disp, 0, 0);
11698 return (GET_CODE (disp) == SYMBOL_REF
11699 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11705 /* Recognizes RTL expressions that are valid memory addresses for an
11706 instruction. The MODE argument is the machine mode for the MEM
11707 expression that wants to use this address.
11709 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11710 convert common non-canonical forms to canonical form so that they will
11714 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11715 rtx addr, bool strict)
11717 struct ix86_address parts;
11718 rtx base, index, disp;
11719 HOST_WIDE_INT scale;
11721 if (ix86_decompose_address (addr, &parts) <= 0)
11722 /* Decomposition failed. */
11726 index = parts.index;
11728 scale = parts.scale;
11730 /* Validate base register. */
11737 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11738 reg = SUBREG_REG (base);
11740 /* Base is not a register. */
11743 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11746 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11747 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11748 /* Base is not valid. */
11752 /* Validate index register. */
11759 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11760 reg = SUBREG_REG (index);
11762 /* Index is not a register. */
11765 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11768 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11769 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11770 /* Index is not valid. */
11774 /* Index and base should have the same mode. */
11776 && GET_MODE (base) != GET_MODE (index))
11779 /* Validate scale factor. */
11783 /* Scale without index. */
11786 if (scale != 2 && scale != 4 && scale != 8)
11787 /* Scale is not a valid multiplier. */
11791 /* Validate displacement. */
11794 if (GET_CODE (disp) == CONST
11795 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11796 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11797 switch (XINT (XEXP (disp, 0), 1))
11799 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11800 used. While ABI specify also 32bit relocations, we don't produce
11801 them at all and use IP relative instead. */
11803 case UNSPEC_GOTOFF:
11804 gcc_assert (flag_pic);
11806 goto is_legitimate_pic;
11808 /* 64bit address unspec. */
11811 case UNSPEC_GOTPCREL:
11813 gcc_assert (flag_pic);
11814 goto is_legitimate_pic;
11816 case UNSPEC_GOTTPOFF:
11817 case UNSPEC_GOTNTPOFF:
11818 case UNSPEC_INDNTPOFF:
11819 case UNSPEC_NTPOFF:
11820 case UNSPEC_DTPOFF:
11823 case UNSPEC_STACK_CHECK:
11824 gcc_assert (flag_split_stack);
11828 /* Invalid address unspec. */
11832 else if (SYMBOLIC_CONST (disp)
11836 && MACHOPIC_INDIRECT
11837 && !machopic_operand_p (disp)
11843 if (TARGET_64BIT && (index || base))
11845 /* foo@dtpoff(%rX) is ok. */
11846 if (GET_CODE (disp) != CONST
11847 || GET_CODE (XEXP (disp, 0)) != PLUS
11848 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11849 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11850 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11851 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11852 /* Non-constant pic memory reference. */
11855 else if ((!TARGET_MACHO || flag_pic)
11856 && ! legitimate_pic_address_disp_p (disp))
11857 /* Displacement is an invalid pic construct. */
11860 else if (MACHO_DYNAMIC_NO_PIC_P
11861 && !ix86_legitimate_constant_p (Pmode, disp))
11862 /* displacment must be referenced via non_lazy_pointer */
11866 /* This code used to verify that a symbolic pic displacement
11867 includes the pic_offset_table_rtx register.
11869 While this is good idea, unfortunately these constructs may
11870 be created by "adds using lea" optimization for incorrect
11879 This code is nonsensical, but results in addressing
11880 GOT table with pic_offset_table_rtx base. We can't
11881 just refuse it easily, since it gets matched by
11882 "addsi3" pattern, that later gets split to lea in the
11883 case output register differs from input. While this
11884 can be handled by separate addsi pattern for this case
11885 that never results in lea, this seems to be easier and
11886 correct fix for crash to disable this test. */
11888 else if (GET_CODE (disp) != LABEL_REF
11889 && !CONST_INT_P (disp)
11890 && (GET_CODE (disp) != CONST
11891 || !ix86_legitimate_constant_p (Pmode, disp))
11892 && (GET_CODE (disp) != SYMBOL_REF
11893 || !ix86_legitimate_constant_p (Pmode, disp)))
11894 /* Displacement is not constant. */
11896 else if (TARGET_64BIT
11897 && !x86_64_immediate_operand (disp, VOIDmode))
11898 /* Displacement is out of range. */
11902 /* Everything looks valid. */
11906 /* Determine if a given RTX is a valid constant address. */
11909 constant_address_p (rtx x)
11911 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11914 /* Return a unique alias set for the GOT. */
11916 static alias_set_type
11917 ix86_GOT_alias_set (void)
11919 static alias_set_type set = -1;
11921 set = new_alias_set ();
11925 /* Return a legitimate reference for ORIG (an address) using the
11926 register REG. If REG is 0, a new pseudo is generated.
11928 There are two types of references that must be handled:
11930 1. Global data references must load the address from the GOT, via
11931 the PIC reg. An insn is emitted to do this load, and the reg is
11934 2. Static data references, constant pool addresses, and code labels
11935 compute the address as an offset from the GOT, whose base is in
11936 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11937 differentiate them from global data objects. The returned
11938 address is the PIC reg + an unspec constant.
11940 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11941 reg also appears in the address. */
11944 legitimize_pic_address (rtx orig, rtx reg)
11947 rtx new_rtx = orig;
11951 if (TARGET_MACHO && !TARGET_64BIT)
11954 reg = gen_reg_rtx (Pmode);
11955 /* Use the generic Mach-O PIC machinery. */
11956 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11960 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11962 else if (TARGET_64BIT
11963 && ix86_cmodel != CM_SMALL_PIC
11964 && gotoff_operand (addr, Pmode))
11967 /* This symbol may be referenced via a displacement from the PIC
11968 base address (@GOTOFF). */
11970 if (reload_in_progress)
11971 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11972 if (GET_CODE (addr) == CONST)
11973 addr = XEXP (addr, 0);
11974 if (GET_CODE (addr) == PLUS)
11976 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11978 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11981 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11982 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11984 tmpreg = gen_reg_rtx (Pmode);
11987 emit_move_insn (tmpreg, new_rtx);
11991 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11992 tmpreg, 1, OPTAB_DIRECT);
11995 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11997 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11999 /* This symbol may be referenced via a displacement from the PIC
12000 base address (@GOTOFF). */
12002 if (reload_in_progress)
12003 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12004 if (GET_CODE (addr) == CONST)
12005 addr = XEXP (addr, 0);
12006 if (GET_CODE (addr) == PLUS)
12008 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12010 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12013 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12014 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12015 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12019 emit_move_insn (reg, new_rtx);
12023 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12024 /* We can't use @GOTOFF for text labels on VxWorks;
12025 see gotoff_operand. */
12026 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12028 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12030 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12031 return legitimize_dllimport_symbol (addr, true);
12032 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12033 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12034 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12036 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12037 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12041 /* For x64 PE-COFF there is no GOT table. So we use address
12043 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12045 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12046 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12049 reg = gen_reg_rtx (Pmode);
12050 emit_move_insn (reg, new_rtx);
12053 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12055 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12056 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12057 new_rtx = gen_const_mem (Pmode, new_rtx);
12058 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12061 reg = gen_reg_rtx (Pmode);
12062 /* Use directly gen_movsi, otherwise the address is loaded
12063 into register for CSE. We don't want to CSE this addresses,
12064 instead we CSE addresses from the GOT table, so skip this. */
12065 emit_insn (gen_movsi (reg, new_rtx));
12070 /* This symbol must be referenced via a load from the
12071 Global Offset Table (@GOT). */
12073 if (reload_in_progress)
12074 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12075 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12076 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12078 new_rtx = force_reg (Pmode, new_rtx);
12079 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12080 new_rtx = gen_const_mem (Pmode, new_rtx);
12081 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12084 reg = gen_reg_rtx (Pmode);
12085 emit_move_insn (reg, new_rtx);
12091 if (CONST_INT_P (addr)
12092 && !x86_64_immediate_operand (addr, VOIDmode))
12096 emit_move_insn (reg, addr);
12100 new_rtx = force_reg (Pmode, addr);
12102 else if (GET_CODE (addr) == CONST)
12104 addr = XEXP (addr, 0);
12106 /* We must match stuff we generate before. Assume the only
12107 unspecs that can get here are ours. Not that we could do
12108 anything with them anyway.... */
12109 if (GET_CODE (addr) == UNSPEC
12110 || (GET_CODE (addr) == PLUS
12111 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12113 gcc_assert (GET_CODE (addr) == PLUS);
12115 if (GET_CODE (addr) == PLUS)
12117 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12119 /* Check first to see if this is a constant offset from a @GOTOFF
12120 symbol reference. */
12121 if (gotoff_operand (op0, Pmode)
12122 && CONST_INT_P (op1))
12126 if (reload_in_progress)
12127 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12128 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12130 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12131 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12132 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12136 emit_move_insn (reg, new_rtx);
12142 if (INTVAL (op1) < -16*1024*1024
12143 || INTVAL (op1) >= 16*1024*1024)
12145 if (!x86_64_immediate_operand (op1, Pmode))
12146 op1 = force_reg (Pmode, op1);
12147 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12153 base = legitimize_pic_address (XEXP (addr, 0), reg);
12154 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12155 base == reg ? NULL_RTX : reg);
12157 if (CONST_INT_P (new_rtx))
12158 new_rtx = plus_constant (base, INTVAL (new_rtx));
12161 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12163 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12164 new_rtx = XEXP (new_rtx, 1);
12166 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12174 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12177 get_thread_pointer (bool to_reg)
12179 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12181 if (GET_MODE (tp) != Pmode)
12182 tp = convert_to_mode (Pmode, tp, 1);
12185 tp = copy_addr_to_reg (tp);
12190 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12192 static GTY(()) rtx ix86_tls_symbol;
12195 ix86_tls_get_addr (void)
12197 if (!ix86_tls_symbol)
12200 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12201 ? "___tls_get_addr" : "__tls_get_addr");
12203 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12206 return ix86_tls_symbol;
12209 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12211 static GTY(()) rtx ix86_tls_module_base_symbol;
12214 ix86_tls_module_base (void)
12216 if (!ix86_tls_module_base_symbol)
12218 ix86_tls_module_base_symbol
12219 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12221 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12222 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12225 return ix86_tls_module_base_symbol;
12228 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12229 false if we expect this to be used for a memory address and true if
12230 we expect to load the address into a register. */
12233 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12235 rtx dest, base, off;
12236 rtx pic = NULL_RTX, tp = NULL_RTX;
12241 case TLS_MODEL_GLOBAL_DYNAMIC:
12242 dest = gen_reg_rtx (Pmode);
12247 pic = pic_offset_table_rtx;
12250 pic = gen_reg_rtx (Pmode);
12251 emit_insn (gen_set_got (pic));
12255 if (TARGET_GNU2_TLS)
12258 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12260 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12262 tp = get_thread_pointer (true);
12263 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12265 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12269 rtx caddr = ix86_tls_get_addr ();
12273 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12276 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12277 insns = get_insns ();
12280 RTL_CONST_CALL_P (insns) = 1;
12281 emit_libcall_block (insns, dest, rax, x);
12284 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12288 case TLS_MODEL_LOCAL_DYNAMIC:
12289 base = gen_reg_rtx (Pmode);
12294 pic = pic_offset_table_rtx;
12297 pic = gen_reg_rtx (Pmode);
12298 emit_insn (gen_set_got (pic));
12302 if (TARGET_GNU2_TLS)
12304 rtx tmp = ix86_tls_module_base ();
12307 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12309 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12311 tp = get_thread_pointer (true);
12312 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12313 gen_rtx_MINUS (Pmode, tmp, tp));
12317 rtx caddr = ix86_tls_get_addr ();
12321 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12324 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12325 insns = get_insns ();
12328 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12329 share the LD_BASE result with other LD model accesses. */
12330 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12331 UNSPEC_TLS_LD_BASE);
12333 RTL_CONST_CALL_P (insns) = 1;
12334 emit_libcall_block (insns, base, rax, eqv);
12337 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12340 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12341 off = gen_rtx_CONST (Pmode, off);
12343 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12345 if (TARGET_GNU2_TLS)
12347 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12349 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12353 case TLS_MODEL_INITIAL_EXEC:
12356 if (TARGET_SUN_TLS)
12358 /* The Sun linker took the AMD64 TLS spec literally
12359 and can only handle %rax as destination of the
12360 initial executable code sequence. */
12362 dest = gen_reg_rtx (Pmode);
12363 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12368 type = UNSPEC_GOTNTPOFF;
12372 if (reload_in_progress)
12373 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12374 pic = pic_offset_table_rtx;
12375 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12377 else if (!TARGET_ANY_GNU_TLS)
12379 pic = gen_reg_rtx (Pmode);
12380 emit_insn (gen_set_got (pic));
12381 type = UNSPEC_GOTTPOFF;
12386 type = UNSPEC_INDNTPOFF;
12389 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12390 off = gen_rtx_CONST (Pmode, off);
12392 off = gen_rtx_PLUS (Pmode, pic, off);
12393 off = gen_const_mem (Pmode, off);
12394 set_mem_alias_set (off, ix86_GOT_alias_set ());
12396 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12398 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12399 off = force_reg (Pmode, off);
12400 return gen_rtx_PLUS (Pmode, base, off);
12404 base = get_thread_pointer (true);
12405 dest = gen_reg_rtx (Pmode);
12406 emit_insn (gen_subsi3 (dest, base, off));
12410 case TLS_MODEL_LOCAL_EXEC:
12411 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12412 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12413 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12414 off = gen_rtx_CONST (Pmode, off);
12416 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12418 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12419 return gen_rtx_PLUS (Pmode, base, off);
12423 base = get_thread_pointer (true);
12424 dest = gen_reg_rtx (Pmode);
12425 emit_insn (gen_subsi3 (dest, base, off));
12430 gcc_unreachable ();
12436 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12439 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12440 htab_t dllimport_map;
12443 get_dllimport_decl (tree decl)
12445 struct tree_map *h, in;
12448 const char *prefix;
12449 size_t namelen, prefixlen;
12454 if (!dllimport_map)
12455 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12457 in.hash = htab_hash_pointer (decl);
12458 in.base.from = decl;
12459 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12460 h = (struct tree_map *) *loc;
12464 *loc = h = ggc_alloc_tree_map ();
12466 h->base.from = decl;
12467 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12468 VAR_DECL, NULL, ptr_type_node);
12469 DECL_ARTIFICIAL (to) = 1;
12470 DECL_IGNORED_P (to) = 1;
12471 DECL_EXTERNAL (to) = 1;
12472 TREE_READONLY (to) = 1;
12474 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12475 name = targetm.strip_name_encoding (name);
12476 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12477 ? "*__imp_" : "*__imp__";
12478 namelen = strlen (name);
12479 prefixlen = strlen (prefix);
12480 imp_name = (char *) alloca (namelen + prefixlen + 1);
12481 memcpy (imp_name, prefix, prefixlen);
12482 memcpy (imp_name + prefixlen, name, namelen + 1);
12484 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12485 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12486 SET_SYMBOL_REF_DECL (rtl, to);
12487 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12489 rtl = gen_const_mem (Pmode, rtl);
12490 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12492 SET_DECL_RTL (to, rtl);
12493 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12498 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12499 true if we require the result be a register. */
12502 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12507 gcc_assert (SYMBOL_REF_DECL (symbol));
12508 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12510 x = DECL_RTL (imp_decl);
12512 x = force_reg (Pmode, x);
12516 /* Try machine-dependent ways of modifying an illegitimate address
12517 to be legitimate. If we find one, return the new, valid address.
12518 This macro is used in only one place: `memory_address' in explow.c.
12520 OLDX is the address as it was before break_out_memory_refs was called.
12521 In some cases it is useful to look at this to decide what needs to be done.
12523 It is always safe for this macro to do nothing. It exists to recognize
12524 opportunities to optimize the output.
12526 For the 80386, we handle X+REG by loading X into a register R and
12527 using R+REG. R will go in a general reg and indexing will be used.
12528 However, if REG is a broken-out memory address or multiplication,
12529 nothing needs to be done because REG can certainly go in a general reg.
12531 When -fpic is used, special handling is needed for symbolic references.
12532 See comments by legitimize_pic_address in i386.c for details. */
12535 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12536 enum machine_mode mode)
12541 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12543 return legitimize_tls_address (x, (enum tls_model) log, false);
12544 if (GET_CODE (x) == CONST
12545 && GET_CODE (XEXP (x, 0)) == PLUS
12546 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12547 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12549 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12550 (enum tls_model) log, false);
12551 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12554 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12556 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12557 return legitimize_dllimport_symbol (x, true);
12558 if (GET_CODE (x) == CONST
12559 && GET_CODE (XEXP (x, 0)) == PLUS
12560 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12561 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12563 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12564 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12568 if (flag_pic && SYMBOLIC_CONST (x))
12569 return legitimize_pic_address (x, 0);
12572 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12573 return machopic_indirect_data_reference (x, 0);
12576 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12577 if (GET_CODE (x) == ASHIFT
12578 && CONST_INT_P (XEXP (x, 1))
12579 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12582 log = INTVAL (XEXP (x, 1));
12583 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12584 GEN_INT (1 << log));
12587 if (GET_CODE (x) == PLUS)
12589 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12591 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12592 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12593 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12596 log = INTVAL (XEXP (XEXP (x, 0), 1));
12597 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12598 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12599 GEN_INT (1 << log));
12602 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12603 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12604 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12607 log = INTVAL (XEXP (XEXP (x, 1), 1));
12608 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12609 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12610 GEN_INT (1 << log));
12613 /* Put multiply first if it isn't already. */
12614 if (GET_CODE (XEXP (x, 1)) == MULT)
12616 rtx tmp = XEXP (x, 0);
12617 XEXP (x, 0) = XEXP (x, 1);
12622 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12623 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12624 created by virtual register instantiation, register elimination, and
12625 similar optimizations. */
12626 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12629 x = gen_rtx_PLUS (Pmode,
12630 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12631 XEXP (XEXP (x, 1), 0)),
12632 XEXP (XEXP (x, 1), 1));
12636 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12637 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12638 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12639 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12640 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12641 && CONSTANT_P (XEXP (x, 1)))
12644 rtx other = NULL_RTX;
12646 if (CONST_INT_P (XEXP (x, 1)))
12648 constant = XEXP (x, 1);
12649 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12651 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12653 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12654 other = XEXP (x, 1);
12662 x = gen_rtx_PLUS (Pmode,
12663 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12664 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12665 plus_constant (other, INTVAL (constant)));
12669 if (changed && ix86_legitimate_address_p (mode, x, false))
12672 if (GET_CODE (XEXP (x, 0)) == MULT)
12675 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12678 if (GET_CODE (XEXP (x, 1)) == MULT)
12681 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12685 && REG_P (XEXP (x, 1))
12686 && REG_P (XEXP (x, 0)))
12689 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12692 x = legitimize_pic_address (x, 0);
12695 if (changed && ix86_legitimate_address_p (mode, x, false))
12698 if (REG_P (XEXP (x, 0)))
12700 rtx temp = gen_reg_rtx (Pmode);
12701 rtx val = force_operand (XEXP (x, 1), temp);
12704 if (GET_MODE (val) != Pmode)
12705 val = convert_to_mode (Pmode, val, 1);
12706 emit_move_insn (temp, val);
12709 XEXP (x, 1) = temp;
12713 else if (REG_P (XEXP (x, 1)))
12715 rtx temp = gen_reg_rtx (Pmode);
12716 rtx val = force_operand (XEXP (x, 0), temp);
12719 if (GET_MODE (val) != Pmode)
12720 val = convert_to_mode (Pmode, val, 1);
12721 emit_move_insn (temp, val);
12724 XEXP (x, 0) = temp;
12732 /* Print an integer constant expression in assembler syntax. Addition
12733 and subtraction are the only arithmetic that may appear in these
12734 expressions. FILE is the stdio stream to write to, X is the rtx, and
12735 CODE is the operand print code from the output string. */
12738 output_pic_addr_const (FILE *file, rtx x, int code)
12742 switch (GET_CODE (x))
12745 gcc_assert (flag_pic);
12750 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12751 output_addr_const (file, x);
12754 const char *name = XSTR (x, 0);
12756 /* Mark the decl as referenced so that cgraph will
12757 output the function. */
12758 if (SYMBOL_REF_DECL (x))
12759 mark_decl_referenced (SYMBOL_REF_DECL (x));
12762 if (MACHOPIC_INDIRECT
12763 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12764 name = machopic_indirection_name (x, /*stub_p=*/true);
12766 assemble_name (file, name);
12768 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12769 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12770 fputs ("@PLT", file);
12777 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12778 assemble_name (asm_out_file, buf);
12782 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12786 /* This used to output parentheses around the expression,
12787 but that does not work on the 386 (either ATT or BSD assembler). */
12788 output_pic_addr_const (file, XEXP (x, 0), code);
12792 if (GET_MODE (x) == VOIDmode)
12794 /* We can use %d if the number is <32 bits and positive. */
12795 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12796 fprintf (file, "0x%lx%08lx",
12797 (unsigned long) CONST_DOUBLE_HIGH (x),
12798 (unsigned long) CONST_DOUBLE_LOW (x));
12800 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12803 /* We can't handle floating point constants;
12804 TARGET_PRINT_OPERAND must handle them. */
12805 output_operand_lossage ("floating constant misused");
12809 /* Some assemblers need integer constants to appear first. */
12810 if (CONST_INT_P (XEXP (x, 0)))
12812 output_pic_addr_const (file, XEXP (x, 0), code);
12814 output_pic_addr_const (file, XEXP (x, 1), code);
12818 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12819 output_pic_addr_const (file, XEXP (x, 1), code);
12821 output_pic_addr_const (file, XEXP (x, 0), code);
12827 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12828 output_pic_addr_const (file, XEXP (x, 0), code);
12830 output_pic_addr_const (file, XEXP (x, 1), code);
12832 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12836 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12838 bool f = i386_asm_output_addr_const_extra (file, x);
12843 gcc_assert (XVECLEN (x, 0) == 1);
12844 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12845 switch (XINT (x, 1))
12848 fputs ("@GOT", file);
12850 case UNSPEC_GOTOFF:
12851 fputs ("@GOTOFF", file);
12853 case UNSPEC_PLTOFF:
12854 fputs ("@PLTOFF", file);
12857 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12858 "(%rip)" : "[rip]", file);
12860 case UNSPEC_GOTPCREL:
12861 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12862 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12864 case UNSPEC_GOTTPOFF:
12865 /* FIXME: This might be @TPOFF in Sun ld too. */
12866 fputs ("@gottpoff", file);
12869 fputs ("@tpoff", file);
12871 case UNSPEC_NTPOFF:
12873 fputs ("@tpoff", file);
12875 fputs ("@ntpoff", file);
12877 case UNSPEC_DTPOFF:
12878 fputs ("@dtpoff", file);
12880 case UNSPEC_GOTNTPOFF:
12882 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12883 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12885 fputs ("@gotntpoff", file);
12887 case UNSPEC_INDNTPOFF:
12888 fputs ("@indntpoff", file);
12891 case UNSPEC_MACHOPIC_OFFSET:
12893 machopic_output_function_base_name (file);
12897 output_operand_lossage ("invalid UNSPEC as operand");
12903 output_operand_lossage ("invalid expression as operand");
12907 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12908 We need to emit DTP-relative relocations. */
12910 static void ATTRIBUTE_UNUSED
12911 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12913 fputs (ASM_LONG, file);
12914 output_addr_const (file, x);
12915 fputs ("@dtpoff", file);
12921 fputs (", 0", file);
12924 gcc_unreachable ();
12928 /* Return true if X is a representation of the PIC register. This copes
12929 with calls from ix86_find_base_term, where the register might have
12930 been replaced by a cselib value. */
12933 ix86_pic_register_p (rtx x)
12935 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12936 return (pic_offset_table_rtx
12937 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12939 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12942 /* Helper function for ix86_delegitimize_address.
12943 Attempt to delegitimize TLS local-exec accesses. */
12946 ix86_delegitimize_tls_address (rtx orig_x)
12948 rtx x = orig_x, unspec;
12949 struct ix86_address addr;
12951 if (!TARGET_TLS_DIRECT_SEG_REFS)
12955 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12957 if (ix86_decompose_address (x, &addr) == 0
12958 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12959 || addr.disp == NULL_RTX
12960 || GET_CODE (addr.disp) != CONST)
12962 unspec = XEXP (addr.disp, 0);
12963 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12964 unspec = XEXP (unspec, 0);
12965 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12967 x = XVECEXP (unspec, 0, 0);
12968 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12969 if (unspec != XEXP (addr.disp, 0))
12970 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12973 rtx idx = addr.index;
12974 if (addr.scale != 1)
12975 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12976 x = gen_rtx_PLUS (Pmode, idx, x);
12979 x = gen_rtx_PLUS (Pmode, addr.base, x);
12980 if (MEM_P (orig_x))
12981 x = replace_equiv_address_nv (orig_x, x);
12985 /* In the name of slightly smaller debug output, and to cater to
12986 general assembler lossage, recognize PIC+GOTOFF and turn it back
12987 into a direct symbol reference.
12989 On Darwin, this is necessary to avoid a crash, because Darwin
12990 has a different PIC label for each routine but the DWARF debugging
12991 information is not associated with any particular routine, so it's
12992 necessary to remove references to the PIC label from RTL stored by
12993 the DWARF output code. */
12996 ix86_delegitimize_address (rtx x)
12998 rtx orig_x = delegitimize_mem_from_attrs (x);
12999 /* addend is NULL or some rtx if x is something+GOTOFF where
13000 something doesn't include the PIC register. */
13001 rtx addend = NULL_RTX;
13002 /* reg_addend is NULL or a multiple of some register. */
13003 rtx reg_addend = NULL_RTX;
13004 /* const_addend is NULL or a const_int. */
13005 rtx const_addend = NULL_RTX;
13006 /* This is the result, or NULL. */
13007 rtx result = NULL_RTX;
13016 if (GET_CODE (x) != CONST
13017 || GET_CODE (XEXP (x, 0)) != UNSPEC
13018 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13019 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13020 || !MEM_P (orig_x))
13021 return ix86_delegitimize_tls_address (orig_x);
13022 x = XVECEXP (XEXP (x, 0), 0, 0);
13023 if (GET_MODE (orig_x) != GET_MODE (x))
13025 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13033 if (GET_CODE (x) != PLUS
13034 || GET_CODE (XEXP (x, 1)) != CONST)
13035 return ix86_delegitimize_tls_address (orig_x);
13037 if (ix86_pic_register_p (XEXP (x, 0)))
13038 /* %ebx + GOT/GOTOFF */
13040 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13042 /* %ebx + %reg * scale + GOT/GOTOFF */
13043 reg_addend = XEXP (x, 0);
13044 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13045 reg_addend = XEXP (reg_addend, 1);
13046 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13047 reg_addend = XEXP (reg_addend, 0);
13050 reg_addend = NULL_RTX;
13051 addend = XEXP (x, 0);
13055 addend = XEXP (x, 0);
13057 x = XEXP (XEXP (x, 1), 0);
13058 if (GET_CODE (x) == PLUS
13059 && CONST_INT_P (XEXP (x, 1)))
13061 const_addend = XEXP (x, 1);
13065 if (GET_CODE (x) == UNSPEC
13066 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13067 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13068 result = XVECEXP (x, 0, 0);
13070 if (TARGET_MACHO && darwin_local_data_pic (x)
13071 && !MEM_P (orig_x))
13072 result = XVECEXP (x, 0, 0);
13075 return ix86_delegitimize_tls_address (orig_x);
13078 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13080 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13083 /* If the rest of original X doesn't involve the PIC register, add
13084 addend and subtract pic_offset_table_rtx. This can happen e.g.
13086 leal (%ebx, %ecx, 4), %ecx
13088 movl foo@GOTOFF(%ecx), %edx
13089 in which case we return (%ecx - %ebx) + foo. */
13090 if (pic_offset_table_rtx)
13091 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13092 pic_offset_table_rtx),
13097 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13099 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13100 if (result == NULL_RTX)
13106 /* If X is a machine specific address (i.e. a symbol or label being
13107 referenced as a displacement from the GOT implemented using an
13108 UNSPEC), then return the base term. Otherwise return X. */
13111 ix86_find_base_term (rtx x)
13117 if (GET_CODE (x) != CONST)
13119 term = XEXP (x, 0);
13120 if (GET_CODE (term) == PLUS
13121 && (CONST_INT_P (XEXP (term, 1))
13122 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13123 term = XEXP (term, 0);
13124 if (GET_CODE (term) != UNSPEC
13125 || (XINT (term, 1) != UNSPEC_GOTPCREL
13126 && XINT (term, 1) != UNSPEC_PCREL))
13129 return XVECEXP (term, 0, 0);
13132 return ix86_delegitimize_address (x);
13136 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13137 int fp, FILE *file)
13139 const char *suffix;
13141 if (mode == CCFPmode || mode == CCFPUmode)
13143 code = ix86_fp_compare_code_to_integer (code);
13147 code = reverse_condition (code);
13198 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13202 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13203 Those same assemblers have the same but opposite lossage on cmov. */
13204 if (mode == CCmode)
13205 suffix = fp ? "nbe" : "a";
13206 else if (mode == CCCmode)
13209 gcc_unreachable ();
13225 gcc_unreachable ();
13229 gcc_assert (mode == CCmode || mode == CCCmode);
13246 gcc_unreachable ();
13250 /* ??? As above. */
13251 gcc_assert (mode == CCmode || mode == CCCmode);
13252 suffix = fp ? "nb" : "ae";
13255 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13259 /* ??? As above. */
13260 if (mode == CCmode)
13262 else if (mode == CCCmode)
13263 suffix = fp ? "nb" : "ae";
13265 gcc_unreachable ();
13268 suffix = fp ? "u" : "p";
13271 suffix = fp ? "nu" : "np";
13274 gcc_unreachable ();
13276 fputs (suffix, file);
13279 /* Print the name of register X to FILE based on its machine mode and number.
13280 If CODE is 'w', pretend the mode is HImode.
13281 If CODE is 'b', pretend the mode is QImode.
13282 If CODE is 'k', pretend the mode is SImode.
13283 If CODE is 'q', pretend the mode is DImode.
13284 If CODE is 'x', pretend the mode is V4SFmode.
13285 If CODE is 't', pretend the mode is V8SFmode.
13286 If CODE is 'h', pretend the reg is the 'high' byte register.
13287 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13288 If CODE is 'd', duplicate the operand for AVX instruction.
13292 print_reg (rtx x, int code, FILE *file)
13295 bool duplicated = code == 'd' && TARGET_AVX;
13297 gcc_assert (x == pc_rtx
13298 || (REGNO (x) != ARG_POINTER_REGNUM
13299 && REGNO (x) != FRAME_POINTER_REGNUM
13300 && REGNO (x) != FLAGS_REG
13301 && REGNO (x) != FPSR_REG
13302 && REGNO (x) != FPCR_REG));
13304 if (ASSEMBLER_DIALECT == ASM_ATT)
13309 gcc_assert (TARGET_64BIT);
13310 fputs ("rip", file);
13314 if (code == 'w' || MMX_REG_P (x))
13316 else if (code == 'b')
13318 else if (code == 'k')
13320 else if (code == 'q')
13322 else if (code == 'y')
13324 else if (code == 'h')
13326 else if (code == 'x')
13328 else if (code == 't')
13331 code = GET_MODE_SIZE (GET_MODE (x));
13333 /* Irritatingly, AMD extended registers use different naming convention
13334 from the normal registers. */
13335 if (REX_INT_REG_P (x))
13337 gcc_assert (TARGET_64BIT);
13341 error ("extended registers have no high halves");
13344 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13347 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13350 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13353 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13356 error ("unsupported operand size for extended register");
13366 if (STACK_TOP_P (x))
13375 if (! ANY_FP_REG_P (x))
13376 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13381 reg = hi_reg_name[REGNO (x)];
13384 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13386 reg = qi_reg_name[REGNO (x)];
13389 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13391 reg = qi_high_reg_name[REGNO (x)];
13396 gcc_assert (!duplicated);
13398 fputs (hi_reg_name[REGNO (x)] + 1, file);
13403 gcc_unreachable ();
13409 if (ASSEMBLER_DIALECT == ASM_ATT)
13410 fprintf (file, ", %%%s", reg);
13412 fprintf (file, ", %s", reg);
13416 /* Locate some local-dynamic symbol still in use by this function
13417 so that we can print its name in some tls_local_dynamic_base
13421 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13425 if (GET_CODE (x) == SYMBOL_REF
13426 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13428 cfun->machine->some_ld_name = XSTR (x, 0);
13435 static const char *
13436 get_some_local_dynamic_name (void)
13440 if (cfun->machine->some_ld_name)
13441 return cfun->machine->some_ld_name;
13443 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13444 if (NONDEBUG_INSN_P (insn)
13445 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13446 return cfun->machine->some_ld_name;
13451 /* Meaning of CODE:
13452 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13453 C -- print opcode suffix for set/cmov insn.
13454 c -- like C, but print reversed condition
13455 F,f -- likewise, but for floating-point.
13456 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13458 R -- print the prefix for register names.
13459 z -- print the opcode suffix for the size of the current operand.
13460 Z -- likewise, with special suffixes for x87 instructions.
13461 * -- print a star (in certain assembler syntax)
13462 A -- print an absolute memory reference.
13463 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13464 s -- print a shift double count, followed by the assemblers argument
13466 b -- print the QImode name of the register for the indicated operand.
13467 %b0 would print %al if operands[0] is reg 0.
13468 w -- likewise, print the HImode name of the register.
13469 k -- likewise, print the SImode name of the register.
13470 q -- likewise, print the DImode name of the register.
13471 x -- likewise, print the V4SFmode name of the register.
13472 t -- likewise, print the V8SFmode name of the register.
13473 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13474 y -- print "st(0)" instead of "st" as a register.
13475 d -- print duplicated register operand for AVX instruction.
13476 D -- print condition for SSE cmp instruction.
13477 P -- if PIC, print an @PLT suffix.
13478 p -- print raw symbol name.
13479 X -- don't print any sort of PIC '@' suffix for a symbol.
13480 & -- print some in-use local-dynamic symbol name.
13481 H -- print a memory address offset by 8; used for sse high-parts
13482 Y -- print condition for XOP pcom* instruction.
13483 + -- print a branch hint as 'cs' or 'ds' prefix
13484 ; -- print a semicolon (after prefixes due to bug in older gas).
13485 @ -- print a segment register of thread base pointer load
13489 ix86_print_operand (FILE *file, rtx x, int code)
13496 if (ASSEMBLER_DIALECT == ASM_ATT)
13502 const char *name = get_some_local_dynamic_name ();
13504 output_operand_lossage ("'%%&' used without any "
13505 "local dynamic TLS references");
13507 assemble_name (file, name);
13512 switch (ASSEMBLER_DIALECT)
13519 /* Intel syntax. For absolute addresses, registers should not
13520 be surrounded by braces. */
13524 ix86_print_operand (file, x, 0);
13531 gcc_unreachable ();
13534 ix86_print_operand (file, x, 0);
13539 if (ASSEMBLER_DIALECT == ASM_ATT)
13544 if (ASSEMBLER_DIALECT == ASM_ATT)
13549 if (ASSEMBLER_DIALECT == ASM_ATT)
13554 if (ASSEMBLER_DIALECT == ASM_ATT)
13559 if (ASSEMBLER_DIALECT == ASM_ATT)
13564 if (ASSEMBLER_DIALECT == ASM_ATT)
13569 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13571 /* Opcodes don't get size suffixes if using Intel opcodes. */
13572 if (ASSEMBLER_DIALECT == ASM_INTEL)
13575 switch (GET_MODE_SIZE (GET_MODE (x)))
13594 output_operand_lossage
13595 ("invalid operand size for operand code '%c'", code);
13600 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13602 (0, "non-integer operand used with operand code '%c'", code);
13606 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13607 if (ASSEMBLER_DIALECT == ASM_INTEL)
13610 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13612 switch (GET_MODE_SIZE (GET_MODE (x)))
13615 #ifdef HAVE_AS_IX86_FILDS
13625 #ifdef HAVE_AS_IX86_FILDQ
13628 fputs ("ll", file);
13636 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13638 /* 387 opcodes don't get size suffixes
13639 if the operands are registers. */
13640 if (STACK_REG_P (x))
13643 switch (GET_MODE_SIZE (GET_MODE (x)))
13664 output_operand_lossage
13665 ("invalid operand type used with operand code '%c'", code);
13669 output_operand_lossage
13670 ("invalid operand size for operand code '%c'", code);
13688 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13690 ix86_print_operand (file, x, 0);
13691 fputs (", ", file);
13696 /* Little bit of braindamage here. The SSE compare instructions
13697 does use completely different names for the comparisons that the
13698 fp conditional moves. */
13701 switch (GET_CODE (x))
13704 fputs ("eq", file);
13707 fputs ("eq_us", file);
13710 fputs ("lt", file);
13713 fputs ("nge", file);
13716 fputs ("le", file);
13719 fputs ("ngt", file);
13722 fputs ("unord", file);
13725 fputs ("neq", file);
13728 fputs ("neq_oq", file);
13731 fputs ("ge", file);
13734 fputs ("nlt", file);
13737 fputs ("gt", file);
13740 fputs ("nle", file);
13743 fputs ("ord", file);
13746 output_operand_lossage ("operand is not a condition code, "
13747 "invalid operand code 'D'");
13753 switch (GET_CODE (x))
13757 fputs ("eq", file);
13761 fputs ("lt", file);
13765 fputs ("le", file);
13768 fputs ("unord", file);
13772 fputs ("neq", file);
13776 fputs ("nlt", file);
13780 fputs ("nle", file);
13783 fputs ("ord", file);
13786 output_operand_lossage ("operand is not a condition code, "
13787 "invalid operand code 'D'");
13793 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13794 if (ASSEMBLER_DIALECT == ASM_ATT)
13796 switch (GET_MODE (x))
13798 case HImode: putc ('w', file); break;
13800 case SFmode: putc ('l', file); break;
13802 case DFmode: putc ('q', file); break;
13803 default: gcc_unreachable ();
13810 if (!COMPARISON_P (x))
13812 output_operand_lossage ("operand is neither a constant nor a "
13813 "condition code, invalid operand code "
13817 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13820 if (!COMPARISON_P (x))
13822 output_operand_lossage ("operand is neither a constant nor a "
13823 "condition code, invalid operand code "
13827 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13828 if (ASSEMBLER_DIALECT == ASM_ATT)
13831 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13834 /* Like above, but reverse condition */
13836 /* Check to see if argument to %c is really a constant
13837 and not a condition code which needs to be reversed. */
13838 if (!COMPARISON_P (x))
13840 output_operand_lossage ("operand is neither a constant nor a "
13841 "condition code, invalid operand "
13845 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13848 if (!COMPARISON_P (x))
13850 output_operand_lossage ("operand is neither a constant nor a "
13851 "condition code, invalid operand "
13855 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13856 if (ASSEMBLER_DIALECT == ASM_ATT)
13859 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13863 /* It doesn't actually matter what mode we use here, as we're
13864 only going to use this for printing. */
13865 x = adjust_address_nv (x, DImode, 8);
13873 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13876 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13879 int pred_val = INTVAL (XEXP (x, 0));
13881 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13882 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13884 int taken = pred_val > REG_BR_PROB_BASE / 2;
13885 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13887 /* Emit hints only in the case default branch prediction
13888 heuristics would fail. */
13889 if (taken != cputaken)
13891 /* We use 3e (DS) prefix for taken branches and
13892 2e (CS) prefix for not taken branches. */
13894 fputs ("ds ; ", file);
13896 fputs ("cs ; ", file);
13904 switch (GET_CODE (x))
13907 fputs ("neq", file);
13910 fputs ("eq", file);
13914 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13918 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13922 fputs ("le", file);
13926 fputs ("lt", file);
13929 fputs ("unord", file);
13932 fputs ("ord", file);
13935 fputs ("ueq", file);
13938 fputs ("nlt", file);
13941 fputs ("nle", file);
13944 fputs ("ule", file);
13947 fputs ("ult", file);
13950 fputs ("une", file);
13953 output_operand_lossage ("operand is not a condition code, "
13954 "invalid operand code 'Y'");
13960 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13966 if (ASSEMBLER_DIALECT == ASM_ATT)
13969 /* The kernel uses a different segment register for performance
13970 reasons; a system call would not have to trash the userspace
13971 segment register, which would be expensive. */
13972 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13973 fputs ("fs", file);
13975 fputs ("gs", file);
13979 output_operand_lossage ("invalid operand code '%c'", code);
13984 print_reg (x, code, file);
13986 else if (MEM_P (x))
13988 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13989 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13990 && GET_MODE (x) != BLKmode)
13993 switch (GET_MODE_SIZE (GET_MODE (x)))
13995 case 1: size = "BYTE"; break;
13996 case 2: size = "WORD"; break;
13997 case 4: size = "DWORD"; break;
13998 case 8: size = "QWORD"; break;
13999 case 12: size = "TBYTE"; break;
14001 if (GET_MODE (x) == XFmode)
14006 case 32: size = "YMMWORD"; break;
14008 gcc_unreachable ();
14011 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14014 else if (code == 'w')
14016 else if (code == 'k')
14019 fputs (size, file);
14020 fputs (" PTR ", file);
14024 /* Avoid (%rip) for call operands. */
14025 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14026 && !CONST_INT_P (x))
14027 output_addr_const (file, x);
14028 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14029 output_operand_lossage ("invalid constraints for operand");
14031 output_address (x);
14034 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14039 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14040 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14042 if (ASSEMBLER_DIALECT == ASM_ATT)
14044 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14046 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14048 fprintf (file, "0x%08x", (unsigned int) l);
14051 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14056 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14057 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14059 if (ASSEMBLER_DIALECT == ASM_ATT)
14061 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14064 /* These float cases don't actually occur as immediate operands. */
14065 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14069 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14070 fputs (dstr, file);
14075 /* We have patterns that allow zero sets of memory, for instance.
14076 In 64-bit mode, we should probably support all 8-byte vectors,
14077 since we can in fact encode that into an immediate. */
14078 if (GET_CODE (x) == CONST_VECTOR)
14080 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14084 if (code != 'P' && code != 'p')
14086 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14088 if (ASSEMBLER_DIALECT == ASM_ATT)
14091 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14092 || GET_CODE (x) == LABEL_REF)
14094 if (ASSEMBLER_DIALECT == ASM_ATT)
14097 fputs ("OFFSET FLAT:", file);
14100 if (CONST_INT_P (x))
14101 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14102 else if (flag_pic || MACHOPIC_INDIRECT)
14103 output_pic_addr_const (file, x, code);
14105 output_addr_const (file, x);
14110 ix86_print_operand_punct_valid_p (unsigned char code)
14112 return (code == '@' || code == '*' || code == '+'
14113 || code == '&' || code == ';');
14116 /* Print a memory operand whose address is ADDR. */
14119 ix86_print_operand_address (FILE *file, rtx addr)
14121 struct ix86_address parts;
14122 rtx base, index, disp;
14124 int ok = ix86_decompose_address (addr, &parts);
14128 if (parts.base && GET_CODE (parts.base) == SUBREG)
14130 rtx tmp = SUBREG_REG (parts.base);
14131 parts.base = simplify_subreg (GET_MODE (parts.base),
14132 tmp, GET_MODE (tmp), 0);
14135 if (parts.index && GET_CODE (parts.index) == SUBREG)
14137 rtx tmp = SUBREG_REG (parts.index);
14138 parts.index = simplify_subreg (GET_MODE (parts.index),
14139 tmp, GET_MODE (tmp), 0);
14143 index = parts.index;
14145 scale = parts.scale;
14153 if (ASSEMBLER_DIALECT == ASM_ATT)
14155 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14158 gcc_unreachable ();
14161 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14162 if (TARGET_64BIT && !base && !index)
14166 if (GET_CODE (disp) == CONST
14167 && GET_CODE (XEXP (disp, 0)) == PLUS
14168 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14169 symbol = XEXP (XEXP (disp, 0), 0);
14171 if (GET_CODE (symbol) == LABEL_REF
14172 || (GET_CODE (symbol) == SYMBOL_REF
14173 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14176 if (!base && !index)
14178 /* Displacement only requires special attention. */
14180 if (CONST_INT_P (disp))
14182 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14183 fputs ("ds:", file);
14184 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14187 output_pic_addr_const (file, disp, 0);
14189 output_addr_const (file, disp);
14195 /* Print SImode registers for zero-extended addresses to force
14196 addr32 prefix. Otherwise print DImode registers to avoid it. */
14198 code = ((GET_CODE (addr) == ZERO_EXTEND
14199 || GET_CODE (addr) == AND)
14203 if (ASSEMBLER_DIALECT == ASM_ATT)
14208 output_pic_addr_const (file, disp, 0);
14209 else if (GET_CODE (disp) == LABEL_REF)
14210 output_asm_label (disp);
14212 output_addr_const (file, disp);
14217 print_reg (base, code, file);
14221 print_reg (index, code, file);
14223 fprintf (file, ",%d", scale);
14229 rtx offset = NULL_RTX;
14233 /* Pull out the offset of a symbol; print any symbol itself. */
14234 if (GET_CODE (disp) == CONST
14235 && GET_CODE (XEXP (disp, 0)) == PLUS
14236 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14238 offset = XEXP (XEXP (disp, 0), 1);
14239 disp = gen_rtx_CONST (VOIDmode,
14240 XEXP (XEXP (disp, 0), 0));
14244 output_pic_addr_const (file, disp, 0);
14245 else if (GET_CODE (disp) == LABEL_REF)
14246 output_asm_label (disp);
14247 else if (CONST_INT_P (disp))
14250 output_addr_const (file, disp);
14256 print_reg (base, code, file);
14259 if (INTVAL (offset) >= 0)
14261 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14265 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14272 print_reg (index, code, file);
14274 fprintf (file, "*%d", scale);
14281 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14284 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14288 if (GET_CODE (x) != UNSPEC)
14291 op = XVECEXP (x, 0, 0);
14292 switch (XINT (x, 1))
14294 case UNSPEC_GOTTPOFF:
14295 output_addr_const (file, op);
14296 /* FIXME: This might be @TPOFF in Sun ld. */
14297 fputs ("@gottpoff", file);
14300 output_addr_const (file, op);
14301 fputs ("@tpoff", file);
14303 case UNSPEC_NTPOFF:
14304 output_addr_const (file, op);
14306 fputs ("@tpoff", file);
14308 fputs ("@ntpoff", file);
14310 case UNSPEC_DTPOFF:
14311 output_addr_const (file, op);
14312 fputs ("@dtpoff", file);
14314 case UNSPEC_GOTNTPOFF:
14315 output_addr_const (file, op);
14317 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14318 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14320 fputs ("@gotntpoff", file);
14322 case UNSPEC_INDNTPOFF:
14323 output_addr_const (file, op);
14324 fputs ("@indntpoff", file);
14327 case UNSPEC_MACHOPIC_OFFSET:
14328 output_addr_const (file, op);
14330 machopic_output_function_base_name (file);
14334 case UNSPEC_STACK_CHECK:
14338 gcc_assert (flag_split_stack);
14340 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14341 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14343 gcc_unreachable ();
14346 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14357 /* Split one or more double-mode RTL references into pairs of half-mode
14358 references. The RTL can be REG, offsettable MEM, integer constant, or
14359 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14360 split and "num" is its length. lo_half and hi_half are output arrays
14361 that parallel "operands". */
14364 split_double_mode (enum machine_mode mode, rtx operands[],
14365 int num, rtx lo_half[], rtx hi_half[])
14367 enum machine_mode half_mode;
14373 half_mode = DImode;
14376 half_mode = SImode;
14379 gcc_unreachable ();
14382 byte = GET_MODE_SIZE (half_mode);
14386 rtx op = operands[num];
14388 /* simplify_subreg refuse to split volatile memory addresses,
14389 but we still have to handle it. */
14392 lo_half[num] = adjust_address (op, half_mode, 0);
14393 hi_half[num] = adjust_address (op, half_mode, byte);
14397 lo_half[num] = simplify_gen_subreg (half_mode, op,
14398 GET_MODE (op) == VOIDmode
14399 ? mode : GET_MODE (op), 0);
14400 hi_half[num] = simplify_gen_subreg (half_mode, op,
14401 GET_MODE (op) == VOIDmode
14402 ? mode : GET_MODE (op), byte);
14407 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14408 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14409 is the expression of the binary operation. The output may either be
14410 emitted here, or returned to the caller, like all output_* functions.
14412 There is no guarantee that the operands are the same mode, as they
14413 might be within FLOAT or FLOAT_EXTEND expressions. */
14415 #ifndef SYSV386_COMPAT
14416 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14417 wants to fix the assemblers because that causes incompatibility
14418 with gcc. No-one wants to fix gcc because that causes
14419 incompatibility with assemblers... You can use the option of
14420 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14421 #define SYSV386_COMPAT 1
14425 output_387_binary_op (rtx insn, rtx *operands)
14427 static char buf[40];
14430 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14432 #ifdef ENABLE_CHECKING
14433 /* Even if we do not want to check the inputs, this documents input
14434 constraints. Which helps in understanding the following code. */
14435 if (STACK_REG_P (operands[0])
14436 && ((REG_P (operands[1])
14437 && REGNO (operands[0]) == REGNO (operands[1])
14438 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14439 || (REG_P (operands[2])
14440 && REGNO (operands[0]) == REGNO (operands[2])
14441 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14442 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14445 gcc_assert (is_sse);
14448 switch (GET_CODE (operands[3]))
14451 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14452 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14460 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14461 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14469 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14470 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14478 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14479 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14487 gcc_unreachable ();
14494 strcpy (buf, ssep);
14495 if (GET_MODE (operands[0]) == SFmode)
14496 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14498 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14502 strcpy (buf, ssep + 1);
14503 if (GET_MODE (operands[0]) == SFmode)
14504 strcat (buf, "ss\t{%2, %0|%0, %2}");
14506 strcat (buf, "sd\t{%2, %0|%0, %2}");
14512 switch (GET_CODE (operands[3]))
14516 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14518 rtx temp = operands[2];
14519 operands[2] = operands[1];
14520 operands[1] = temp;
14523 /* know operands[0] == operands[1]. */
14525 if (MEM_P (operands[2]))
14531 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14533 if (STACK_TOP_P (operands[0]))
14534 /* How is it that we are storing to a dead operand[2]?
14535 Well, presumably operands[1] is dead too. We can't
14536 store the result to st(0) as st(0) gets popped on this
14537 instruction. Instead store to operands[2] (which I
14538 think has to be st(1)). st(1) will be popped later.
14539 gcc <= 2.8.1 didn't have this check and generated
14540 assembly code that the Unixware assembler rejected. */
14541 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14543 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14547 if (STACK_TOP_P (operands[0]))
14548 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14550 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14555 if (MEM_P (operands[1]))
14561 if (MEM_P (operands[2]))
14567 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14570 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14571 derived assemblers, confusingly reverse the direction of
14572 the operation for fsub{r} and fdiv{r} when the
14573 destination register is not st(0). The Intel assembler
14574 doesn't have this brain damage. Read !SYSV386_COMPAT to
14575 figure out what the hardware really does. */
14576 if (STACK_TOP_P (operands[0]))
14577 p = "{p\t%0, %2|rp\t%2, %0}";
14579 p = "{rp\t%2, %0|p\t%0, %2}";
14581 if (STACK_TOP_P (operands[0]))
14582 /* As above for fmul/fadd, we can't store to st(0). */
14583 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14585 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14590 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14593 if (STACK_TOP_P (operands[0]))
14594 p = "{rp\t%0, %1|p\t%1, %0}";
14596 p = "{p\t%1, %0|rp\t%0, %1}";
14598 if (STACK_TOP_P (operands[0]))
14599 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14601 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14606 if (STACK_TOP_P (operands[0]))
14608 if (STACK_TOP_P (operands[1]))
14609 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14611 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14614 else if (STACK_TOP_P (operands[1]))
14617 p = "{\t%1, %0|r\t%0, %1}";
14619 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14625 p = "{r\t%2, %0|\t%0, %2}";
14627 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14633 gcc_unreachable ();
14640 /* Return needed mode for entity in optimize_mode_switching pass. */
14643 ix86_mode_needed (int entity, rtx insn)
14645 enum attr_i387_cw mode;
14647 /* The mode UNINITIALIZED is used to store control word after a
14648 function call or ASM pattern. The mode ANY specify that function
14649 has no requirements on the control word and make no changes in the
14650 bits we are interested in. */
14653 || (NONJUMP_INSN_P (insn)
14654 && (asm_noperands (PATTERN (insn)) >= 0
14655 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14656 return I387_CW_UNINITIALIZED;
14658 if (recog_memoized (insn) < 0)
14659 return I387_CW_ANY;
14661 mode = get_attr_i387_cw (insn);
14666 if (mode == I387_CW_TRUNC)
14671 if (mode == I387_CW_FLOOR)
14676 if (mode == I387_CW_CEIL)
14681 if (mode == I387_CW_MASK_PM)
14686 gcc_unreachable ();
14689 return I387_CW_ANY;
14692 /* Output code to initialize control word copies used by trunc?f?i and
14693 rounding patterns. CURRENT_MODE is set to current control word,
14694 while NEW_MODE is set to new control word. */
14697 emit_i387_cw_initialization (int mode)
14699 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14702 enum ix86_stack_slot slot;
14704 rtx reg = gen_reg_rtx (HImode);
14706 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14707 emit_move_insn (reg, copy_rtx (stored_mode));
14709 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14710 || optimize_function_for_size_p (cfun))
14714 case I387_CW_TRUNC:
14715 /* round toward zero (truncate) */
14716 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14717 slot = SLOT_CW_TRUNC;
14720 case I387_CW_FLOOR:
14721 /* round down toward -oo */
14722 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14723 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14724 slot = SLOT_CW_FLOOR;
14728 /* round up toward +oo */
14729 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14730 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14731 slot = SLOT_CW_CEIL;
14734 case I387_CW_MASK_PM:
14735 /* mask precision exception for nearbyint() */
14736 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14737 slot = SLOT_CW_MASK_PM;
14741 gcc_unreachable ();
14748 case I387_CW_TRUNC:
14749 /* round toward zero (truncate) */
14750 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14751 slot = SLOT_CW_TRUNC;
14754 case I387_CW_FLOOR:
14755 /* round down toward -oo */
14756 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14757 slot = SLOT_CW_FLOOR;
14761 /* round up toward +oo */
14762 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14763 slot = SLOT_CW_CEIL;
14766 case I387_CW_MASK_PM:
14767 /* mask precision exception for nearbyint() */
14768 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14769 slot = SLOT_CW_MASK_PM;
14773 gcc_unreachable ();
14777 gcc_assert (slot < MAX_386_STACK_LOCALS);
14779 new_mode = assign_386_stack_local (HImode, slot);
14780 emit_move_insn (new_mode, reg);
14783 /* Output code for INSN to convert a float to a signed int. OPERANDS
14784 are the insn operands. The output may be [HSD]Imode and the input
14785 operand may be [SDX]Fmode. */
14788 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14790 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14791 int dimode_p = GET_MODE (operands[0]) == DImode;
14792 int round_mode = get_attr_i387_cw (insn);
14794 /* Jump through a hoop or two for DImode, since the hardware has no
14795 non-popping instruction. We used to do this a different way, but
14796 that was somewhat fragile and broke with post-reload splitters. */
14797 if ((dimode_p || fisttp) && !stack_top_dies)
14798 output_asm_insn ("fld\t%y1", operands);
14800 gcc_assert (STACK_TOP_P (operands[1]));
14801 gcc_assert (MEM_P (operands[0]));
14802 gcc_assert (GET_MODE (operands[1]) != TFmode);
14805 output_asm_insn ("fisttp%Z0\t%0", operands);
14808 if (round_mode != I387_CW_ANY)
14809 output_asm_insn ("fldcw\t%3", operands);
14810 if (stack_top_dies || dimode_p)
14811 output_asm_insn ("fistp%Z0\t%0", operands);
14813 output_asm_insn ("fist%Z0\t%0", operands);
14814 if (round_mode != I387_CW_ANY)
14815 output_asm_insn ("fldcw\t%2", operands);
14821 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14822 have the values zero or one, indicates the ffreep insn's operand
14823 from the OPERANDS array. */
14825 static const char *
14826 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14828 if (TARGET_USE_FFREEP)
14829 #ifdef HAVE_AS_IX86_FFREEP
14830 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14833 static char retval[32];
14834 int regno = REGNO (operands[opno]);
14836 gcc_assert (FP_REGNO_P (regno));
14838 regno -= FIRST_STACK_REG;
14840 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14845 return opno ? "fstp\t%y1" : "fstp\t%y0";
14849 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14850 should be used. UNORDERED_P is true when fucom should be used. */
14853 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14855 int stack_top_dies;
14856 rtx cmp_op0, cmp_op1;
14857 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14861 cmp_op0 = operands[0];
14862 cmp_op1 = operands[1];
14866 cmp_op0 = operands[1];
14867 cmp_op1 = operands[2];
14872 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14873 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14874 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14875 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14877 if (GET_MODE (operands[0]) == SFmode)
14879 return &ucomiss[TARGET_AVX ? 0 : 1];
14881 return &comiss[TARGET_AVX ? 0 : 1];
14884 return &ucomisd[TARGET_AVX ? 0 : 1];
14886 return &comisd[TARGET_AVX ? 0 : 1];
14889 gcc_assert (STACK_TOP_P (cmp_op0));
14891 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14893 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14895 if (stack_top_dies)
14897 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14898 return output_387_ffreep (operands, 1);
14901 return "ftst\n\tfnstsw\t%0";
14904 if (STACK_REG_P (cmp_op1)
14906 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14907 && REGNO (cmp_op1) != FIRST_STACK_REG)
14909 /* If both the top of the 387 stack dies, and the other operand
14910 is also a stack register that dies, then this must be a
14911 `fcompp' float compare */
14915 /* There is no double popping fcomi variant. Fortunately,
14916 eflags is immune from the fstp's cc clobbering. */
14918 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14920 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14921 return output_387_ffreep (operands, 0);
14926 return "fucompp\n\tfnstsw\t%0";
14928 return "fcompp\n\tfnstsw\t%0";
14933 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14935 static const char * const alt[16] =
14937 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14938 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14939 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14940 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14942 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14943 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14947 "fcomi\t{%y1, %0|%0, %y1}",
14948 "fcomip\t{%y1, %0|%0, %y1}",
14949 "fucomi\t{%y1, %0|%0, %y1}",
14950 "fucomip\t{%y1, %0|%0, %y1}",
14961 mask = eflags_p << 3;
14962 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14963 mask |= unordered_p << 1;
14964 mask |= stack_top_dies;
14966 gcc_assert (mask < 16);
14975 ix86_output_addr_vec_elt (FILE *file, int value)
14977 const char *directive = ASM_LONG;
14981 directive = ASM_QUAD;
14983 gcc_assert (!TARGET_64BIT);
14986 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14990 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14992 const char *directive = ASM_LONG;
14995 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14996 directive = ASM_QUAD;
14998 gcc_assert (!TARGET_64BIT);
15000 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15001 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15002 fprintf (file, "%s%s%d-%s%d\n",
15003 directive, LPREFIX, value, LPREFIX, rel);
15004 else if (HAVE_AS_GOTOFF_IN_DATA)
15005 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15007 else if (TARGET_MACHO)
15009 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15010 machopic_output_function_base_name (file);
15015 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15016 GOT_SYMBOL_NAME, LPREFIX, value);
15019 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15023 ix86_expand_clear (rtx dest)
15027 /* We play register width games, which are only valid after reload. */
15028 gcc_assert (reload_completed);
15030 /* Avoid HImode and its attendant prefix byte. */
15031 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15032 dest = gen_rtx_REG (SImode, REGNO (dest));
15033 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15035 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15036 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15038 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15039 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15045 /* X is an unchanging MEM. If it is a constant pool reference, return
15046 the constant pool rtx, else NULL. */
15049 maybe_get_pool_constant (rtx x)
15051 x = ix86_delegitimize_address (XEXP (x, 0));
15053 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15054 return get_pool_constant (x);
15060 ix86_expand_move (enum machine_mode mode, rtx operands[])
15063 enum tls_model model;
15068 if (GET_CODE (op1) == SYMBOL_REF)
15070 model = SYMBOL_REF_TLS_MODEL (op1);
15073 op1 = legitimize_tls_address (op1, model, true);
15074 op1 = force_operand (op1, op0);
15077 if (GET_MODE (op1) != mode)
15078 op1 = convert_to_mode (mode, op1, 1);
15080 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15081 && SYMBOL_REF_DLLIMPORT_P (op1))
15082 op1 = legitimize_dllimport_symbol (op1, false);
15084 else if (GET_CODE (op1) == CONST
15085 && GET_CODE (XEXP (op1, 0)) == PLUS
15086 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15088 rtx addend = XEXP (XEXP (op1, 0), 1);
15089 rtx symbol = XEXP (XEXP (op1, 0), 0);
15092 model = SYMBOL_REF_TLS_MODEL (symbol);
15094 tmp = legitimize_tls_address (symbol, model, true);
15095 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15096 && SYMBOL_REF_DLLIMPORT_P (symbol))
15097 tmp = legitimize_dllimport_symbol (symbol, true);
15101 tmp = force_operand (tmp, NULL);
15102 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15103 op0, 1, OPTAB_DIRECT);
15106 if (GET_MODE (tmp) != mode)
15107 op1 = convert_to_mode (mode, tmp, 1);
15111 if ((flag_pic || MACHOPIC_INDIRECT)
15112 && symbolic_operand (op1, mode))
15114 if (TARGET_MACHO && !TARGET_64BIT)
15117 /* dynamic-no-pic */
15118 if (MACHOPIC_INDIRECT)
15120 rtx temp = ((reload_in_progress
15121 || ((op0 && REG_P (op0))
15123 ? op0 : gen_reg_rtx (Pmode));
15124 op1 = machopic_indirect_data_reference (op1, temp);
15126 op1 = machopic_legitimize_pic_address (op1, mode,
15127 temp == op1 ? 0 : temp);
15129 if (op0 != op1 && GET_CODE (op0) != MEM)
15131 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15135 if (GET_CODE (op0) == MEM)
15136 op1 = force_reg (Pmode, op1);
15140 if (GET_CODE (temp) != REG)
15141 temp = gen_reg_rtx (Pmode);
15142 temp = legitimize_pic_address (op1, temp);
15147 /* dynamic-no-pic */
15153 op1 = force_reg (mode, op1);
15154 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15156 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15157 op1 = legitimize_pic_address (op1, reg);
15160 if (GET_MODE (op1) != mode)
15161 op1 = convert_to_mode (mode, op1, 1);
15168 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15169 || !push_operand (op0, mode))
15171 op1 = force_reg (mode, op1);
15173 if (push_operand (op0, mode)
15174 && ! general_no_elim_operand (op1, mode))
15175 op1 = copy_to_mode_reg (mode, op1);
15177 /* Force large constants in 64bit compilation into register
15178 to get them CSEed. */
15179 if (can_create_pseudo_p ()
15180 && (mode == DImode) && TARGET_64BIT
15181 && immediate_operand (op1, mode)
15182 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15183 && !register_operand (op0, mode)
15185 op1 = copy_to_mode_reg (mode, op1);
15187 if (can_create_pseudo_p ()
15188 && FLOAT_MODE_P (mode)
15189 && GET_CODE (op1) == CONST_DOUBLE)
15191 /* If we are loading a floating point constant to a register,
15192 force the value to memory now, since we'll get better code
15193 out the back end. */
15195 op1 = validize_mem (force_const_mem (mode, op1));
15196 if (!register_operand (op0, mode))
15198 rtx temp = gen_reg_rtx (mode);
15199 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15200 emit_move_insn (op0, temp);
15206 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15210 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15212 rtx op0 = operands[0], op1 = operands[1];
15213 unsigned int align = GET_MODE_ALIGNMENT (mode);
15215 /* Force constants other than zero into memory. We do not know how
15216 the instructions used to build constants modify the upper 64 bits
15217 of the register, once we have that information we may be able
15218 to handle some of them more efficiently. */
15219 if (can_create_pseudo_p ()
15220 && register_operand (op0, mode)
15221 && (CONSTANT_P (op1)
15222 || (GET_CODE (op1) == SUBREG
15223 && CONSTANT_P (SUBREG_REG (op1))))
15224 && !standard_sse_constant_p (op1))
15225 op1 = validize_mem (force_const_mem (mode, op1));
15227 /* We need to check memory alignment for SSE mode since attribute
15228 can make operands unaligned. */
15229 if (can_create_pseudo_p ()
15230 && SSE_REG_MODE_P (mode)
15231 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15232 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15236 /* ix86_expand_vector_move_misalign() does not like constants ... */
15237 if (CONSTANT_P (op1)
15238 || (GET_CODE (op1) == SUBREG
15239 && CONSTANT_P (SUBREG_REG (op1))))
15240 op1 = validize_mem (force_const_mem (mode, op1));
15242 /* ... nor both arguments in memory. */
15243 if (!register_operand (op0, mode)
15244 && !register_operand (op1, mode))
15245 op1 = force_reg (mode, op1);
15247 tmp[0] = op0; tmp[1] = op1;
15248 ix86_expand_vector_move_misalign (mode, tmp);
15252 /* Make operand1 a register if it isn't already. */
15253 if (can_create_pseudo_p ()
15254 && !register_operand (op0, mode)
15255 && !register_operand (op1, mode))
15257 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15261 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15264 /* Split 32-byte AVX unaligned load and store if needed. */
15267 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15270 rtx (*extract) (rtx, rtx, rtx);
15271 rtx (*move_unaligned) (rtx, rtx);
15272 enum machine_mode mode;
15274 switch (GET_MODE (op0))
15277 gcc_unreachable ();
15279 extract = gen_avx_vextractf128v32qi;
15280 move_unaligned = gen_avx_movdqu256;
15284 extract = gen_avx_vextractf128v8sf;
15285 move_unaligned = gen_avx_movups256;
15289 extract = gen_avx_vextractf128v4df;
15290 move_unaligned = gen_avx_movupd256;
15295 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15297 rtx r = gen_reg_rtx (mode);
15298 m = adjust_address (op1, mode, 0);
15299 emit_move_insn (r, m);
15300 m = adjust_address (op1, mode, 16);
15301 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15302 emit_move_insn (op0, r);
15304 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15306 m = adjust_address (op0, mode, 0);
15307 emit_insn (extract (m, op1, const0_rtx));
15308 m = adjust_address (op0, mode, 16);
15309 emit_insn (extract (m, op1, const1_rtx));
15312 emit_insn (move_unaligned (op0, op1));
15315 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15316 straight to ix86_expand_vector_move. */
15317 /* Code generation for scalar reg-reg moves of single and double precision data:
15318 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15322 if (x86_sse_partial_reg_dependency == true)
15327 Code generation for scalar loads of double precision data:
15328 if (x86_sse_split_regs == true)
15329 movlpd mem, reg (gas syntax)
15333 Code generation for unaligned packed loads of single precision data
15334 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15335 if (x86_sse_unaligned_move_optimal)
15338 if (x86_sse_partial_reg_dependency == true)
15350 Code generation for unaligned packed loads of double precision data
15351 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15352 if (x86_sse_unaligned_move_optimal)
15355 if (x86_sse_split_regs == true)
15368 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15377 switch (GET_MODE_CLASS (mode))
15379 case MODE_VECTOR_INT:
15381 switch (GET_MODE_SIZE (mode))
15384 /* If we're optimizing for size, movups is the smallest. */
15385 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15387 op0 = gen_lowpart (V4SFmode, op0);
15388 op1 = gen_lowpart (V4SFmode, op1);
15389 emit_insn (gen_sse_movups (op0, op1));
15392 op0 = gen_lowpart (V16QImode, op0);
15393 op1 = gen_lowpart (V16QImode, op1);
15394 emit_insn (gen_sse2_movdqu (op0, op1));
15397 op0 = gen_lowpart (V32QImode, op0);
15398 op1 = gen_lowpart (V32QImode, op1);
15399 ix86_avx256_split_vector_move_misalign (op0, op1);
15402 gcc_unreachable ();
15405 case MODE_VECTOR_FLOAT:
15406 op0 = gen_lowpart (mode, op0);
15407 op1 = gen_lowpart (mode, op1);
15412 emit_insn (gen_sse_movups (op0, op1));
15415 ix86_avx256_split_vector_move_misalign (op0, op1);
15418 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15420 op0 = gen_lowpart (V4SFmode, op0);
15421 op1 = gen_lowpart (V4SFmode, op1);
15422 emit_insn (gen_sse_movups (op0, op1));
15425 emit_insn (gen_sse2_movupd (op0, op1));
15428 ix86_avx256_split_vector_move_misalign (op0, op1);
15431 gcc_unreachable ();
15436 gcc_unreachable ();
15444 /* If we're optimizing for size, movups is the smallest. */
15445 if (optimize_insn_for_size_p ()
15446 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15448 op0 = gen_lowpart (V4SFmode, op0);
15449 op1 = gen_lowpart (V4SFmode, op1);
15450 emit_insn (gen_sse_movups (op0, op1));
15454 /* ??? If we have typed data, then it would appear that using
15455 movdqu is the only way to get unaligned data loaded with
15457 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15459 op0 = gen_lowpart (V16QImode, op0);
15460 op1 = gen_lowpart (V16QImode, op1);
15461 emit_insn (gen_sse2_movdqu (op0, op1));
15465 if (TARGET_SSE2 && mode == V2DFmode)
15469 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15471 op0 = gen_lowpart (V2DFmode, op0);
15472 op1 = gen_lowpart (V2DFmode, op1);
15473 emit_insn (gen_sse2_movupd (op0, op1));
15477 /* When SSE registers are split into halves, we can avoid
15478 writing to the top half twice. */
15479 if (TARGET_SSE_SPLIT_REGS)
15481 emit_clobber (op0);
15486 /* ??? Not sure about the best option for the Intel chips.
15487 The following would seem to satisfy; the register is
15488 entirely cleared, breaking the dependency chain. We
15489 then store to the upper half, with a dependency depth
15490 of one. A rumor has it that Intel recommends two movsd
15491 followed by an unpacklpd, but this is unconfirmed. And
15492 given that the dependency depth of the unpacklpd would
15493 still be one, I'm not sure why this would be better. */
15494 zero = CONST0_RTX (V2DFmode);
15497 m = adjust_address (op1, DFmode, 0);
15498 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15499 m = adjust_address (op1, DFmode, 8);
15500 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15504 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15506 op0 = gen_lowpart (V4SFmode, op0);
15507 op1 = gen_lowpart (V4SFmode, op1);
15508 emit_insn (gen_sse_movups (op0, op1));
15512 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15513 emit_move_insn (op0, CONST0_RTX (mode));
15515 emit_clobber (op0);
15517 if (mode != V4SFmode)
15518 op0 = gen_lowpart (V4SFmode, op0);
15519 m = adjust_address (op1, V2SFmode, 0);
15520 emit_insn (gen_sse_loadlps (op0, op0, m));
15521 m = adjust_address (op1, V2SFmode, 8);
15522 emit_insn (gen_sse_loadhps (op0, op0, m));
15525 else if (MEM_P (op0))
15527 /* If we're optimizing for size, movups is the smallest. */
15528 if (optimize_insn_for_size_p ()
15529 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15531 op0 = gen_lowpart (V4SFmode, op0);
15532 op1 = gen_lowpart (V4SFmode, op1);
15533 emit_insn (gen_sse_movups (op0, op1));
15537 /* ??? Similar to above, only less clear because of quote
15538 typeless stores unquote. */
15539 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15540 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15542 op0 = gen_lowpart (V16QImode, op0);
15543 op1 = gen_lowpart (V16QImode, op1);
15544 emit_insn (gen_sse2_movdqu (op0, op1));
15548 if (TARGET_SSE2 && mode == V2DFmode)
15550 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15552 op0 = gen_lowpart (V2DFmode, op0);
15553 op1 = gen_lowpart (V2DFmode, op1);
15554 emit_insn (gen_sse2_movupd (op0, op1));
15558 m = adjust_address (op0, DFmode, 0);
15559 emit_insn (gen_sse2_storelpd (m, op1));
15560 m = adjust_address (op0, DFmode, 8);
15561 emit_insn (gen_sse2_storehpd (m, op1));
15566 if (mode != V4SFmode)
15567 op1 = gen_lowpart (V4SFmode, op1);
15569 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15571 op0 = gen_lowpart (V4SFmode, op0);
15572 emit_insn (gen_sse_movups (op0, op1));
15576 m = adjust_address (op0, V2SFmode, 0);
15577 emit_insn (gen_sse_storelps (m, op1));
15578 m = adjust_address (op0, V2SFmode, 8);
15579 emit_insn (gen_sse_storehps (m, op1));
15584 gcc_unreachable ();
15587 /* Expand a push in MODE. This is some mode for which we do not support
15588 proper push instructions, at least from the registers that we expect
15589 the value to live in. */
15592 ix86_expand_push (enum machine_mode mode, rtx x)
15596 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15597 GEN_INT (-GET_MODE_SIZE (mode)),
15598 stack_pointer_rtx, 1, OPTAB_DIRECT);
15599 if (tmp != stack_pointer_rtx)
15600 emit_move_insn (stack_pointer_rtx, tmp);
15602 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15604 /* When we push an operand onto stack, it has to be aligned at least
15605 at the function argument boundary. However since we don't have
15606 the argument type, we can't determine the actual argument
15608 emit_move_insn (tmp, x);
15611 /* Helper function of ix86_fixup_binary_operands to canonicalize
15612 operand order. Returns true if the operands should be swapped. */
15615 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15618 rtx dst = operands[0];
15619 rtx src1 = operands[1];
15620 rtx src2 = operands[2];
15622 /* If the operation is not commutative, we can't do anything. */
15623 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15626 /* Highest priority is that src1 should match dst. */
15627 if (rtx_equal_p (dst, src1))
15629 if (rtx_equal_p (dst, src2))
15632 /* Next highest priority is that immediate constants come second. */
15633 if (immediate_operand (src2, mode))
15635 if (immediate_operand (src1, mode))
15638 /* Lowest priority is that memory references should come second. */
15648 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15649 destination to use for the operation. If different from the true
15650 destination in operands[0], a copy operation will be required. */
15653 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15656 rtx dst = operands[0];
15657 rtx src1 = operands[1];
15658 rtx src2 = operands[2];
15660 /* Canonicalize operand order. */
15661 if (ix86_swap_binary_operands_p (code, mode, operands))
15665 /* It is invalid to swap operands of different modes. */
15666 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15673 /* Both source operands cannot be in memory. */
15674 if (MEM_P (src1) && MEM_P (src2))
15676 /* Optimization: Only read from memory once. */
15677 if (rtx_equal_p (src1, src2))
15679 src2 = force_reg (mode, src2);
15683 src2 = force_reg (mode, src2);
15686 /* If the destination is memory, and we do not have matching source
15687 operands, do things in registers. */
15688 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15689 dst = gen_reg_rtx (mode);
15691 /* Source 1 cannot be a constant. */
15692 if (CONSTANT_P (src1))
15693 src1 = force_reg (mode, src1);
15695 /* Source 1 cannot be a non-matching memory. */
15696 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15697 src1 = force_reg (mode, src1);
15699 operands[1] = src1;
15700 operands[2] = src2;
15704 /* Similarly, but assume that the destination has already been
15705 set up properly. */
15708 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15709 enum machine_mode mode, rtx operands[])
15711 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15712 gcc_assert (dst == operands[0]);
15715 /* Attempt to expand a binary operator. Make the expansion closer to the
15716 actual machine, then just general_operand, which will allow 3 separate
15717 memory references (one output, two input) in a single insn. */
15720 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15723 rtx src1, src2, dst, op, clob;
15725 dst = ix86_fixup_binary_operands (code, mode, operands);
15726 src1 = operands[1];
15727 src2 = operands[2];
15729 /* Emit the instruction. */
15731 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15732 if (reload_in_progress)
15734 /* Reload doesn't know about the flags register, and doesn't know that
15735 it doesn't want to clobber it. We can only do this with PLUS. */
15736 gcc_assert (code == PLUS);
15739 else if (reload_completed
15741 && !rtx_equal_p (dst, src1))
15743 /* This is going to be an LEA; avoid splitting it later. */
15748 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15749 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15752 /* Fix up the destination if needed. */
15753 if (dst != operands[0])
15754 emit_move_insn (operands[0], dst);
15757 /* Return TRUE or FALSE depending on whether the binary operator meets the
15758 appropriate constraints. */
15761 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15764 rtx dst = operands[0];
15765 rtx src1 = operands[1];
15766 rtx src2 = operands[2];
15768 /* Both source operands cannot be in memory. */
15769 if (MEM_P (src1) && MEM_P (src2))
15772 /* Canonicalize operand order for commutative operators. */
15773 if (ix86_swap_binary_operands_p (code, mode, operands))
15780 /* If the destination is memory, we must have a matching source operand. */
15781 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15784 /* Source 1 cannot be a constant. */
15785 if (CONSTANT_P (src1))
15788 /* Source 1 cannot be a non-matching memory. */
15789 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15791 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15792 return (code == AND
15795 || (TARGET_64BIT && mode == DImode))
15796 && CONST_INT_P (src2)
15797 && (INTVAL (src2) == 0xff
15798 || INTVAL (src2) == 0xffff));
15804 /* Attempt to expand a unary operator. Make the expansion closer to the
15805 actual machine, then just general_operand, which will allow 2 separate
15806 memory references (one output, one input) in a single insn. */
15809 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15812 int matching_memory;
15813 rtx src, dst, op, clob;
15818 /* If the destination is memory, and we do not have matching source
15819 operands, do things in registers. */
15820 matching_memory = 0;
15823 if (rtx_equal_p (dst, src))
15824 matching_memory = 1;
15826 dst = gen_reg_rtx (mode);
15829 /* When source operand is memory, destination must match. */
15830 if (MEM_P (src) && !matching_memory)
15831 src = force_reg (mode, src);
15833 /* Emit the instruction. */
15835 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15836 if (reload_in_progress || code == NOT)
15838 /* Reload doesn't know about the flags register, and doesn't know that
15839 it doesn't want to clobber it. */
15840 gcc_assert (code == NOT);
15845 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15846 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15849 /* Fix up the destination if needed. */
15850 if (dst != operands[0])
15851 emit_move_insn (operands[0], dst);
15854 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15855 divisor are within the range [0-255]. */
15858 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15861 rtx end_label, qimode_label;
15862 rtx insn, div, mod;
15863 rtx scratch, tmp0, tmp1, tmp2;
15864 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15865 rtx (*gen_zero_extend) (rtx, rtx);
15866 rtx (*gen_test_ccno_1) (rtx, rtx);
15871 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15872 gen_test_ccno_1 = gen_testsi_ccno_1;
15873 gen_zero_extend = gen_zero_extendqisi2;
15876 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15877 gen_test_ccno_1 = gen_testdi_ccno_1;
15878 gen_zero_extend = gen_zero_extendqidi2;
15881 gcc_unreachable ();
15884 end_label = gen_label_rtx ();
15885 qimode_label = gen_label_rtx ();
15887 scratch = gen_reg_rtx (mode);
15889 /* Use 8bit unsigned divimod if dividend and divisor are within
15890 the range [0-255]. */
15891 emit_move_insn (scratch, operands[2]);
15892 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15893 scratch, 1, OPTAB_DIRECT);
15894 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15895 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15896 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15897 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15898 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15900 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15901 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15902 JUMP_LABEL (insn) = qimode_label;
15904 /* Generate original signed/unsigned divimod. */
15905 div = gen_divmod4_1 (operands[0], operands[1],
15906 operands[2], operands[3]);
15909 /* Branch to the end. */
15910 emit_jump_insn (gen_jump (end_label));
15913 /* Generate 8bit unsigned divide. */
15914 emit_label (qimode_label);
15915 /* Don't use operands[0] for result of 8bit divide since not all
15916 registers support QImode ZERO_EXTRACT. */
15917 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15918 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15919 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15920 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15924 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15925 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15929 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15930 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15933 /* Extract remainder from AH. */
15934 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15935 if (REG_P (operands[1]))
15936 insn = emit_move_insn (operands[1], tmp1);
15939 /* Need a new scratch register since the old one has result
15941 scratch = gen_reg_rtx (mode);
15942 emit_move_insn (scratch, tmp1);
15943 insn = emit_move_insn (operands[1], scratch);
15945 set_unique_reg_note (insn, REG_EQUAL, mod);
15947 /* Zero extend quotient from AL. */
15948 tmp1 = gen_lowpart (QImode, tmp0);
15949 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15950 set_unique_reg_note (insn, REG_EQUAL, div);
15952 emit_label (end_label);
15955 #define LEA_SEARCH_THRESHOLD 12
15957 /* Search backward for non-agu definition of register number REGNO1
15958 or register number REGNO2 in INSN's basic block until
15959 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15960 2. Reach BB boundary, or
15961 3. Reach agu definition.
15962 Returns the distance between the non-agu definition point and INSN.
15963 If no definition point, returns -1. */
15966 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15969 basic_block bb = BLOCK_FOR_INSN (insn);
15972 enum attr_type insn_type;
15974 if (insn != BB_HEAD (bb))
15976 rtx prev = PREV_INSN (insn);
15977 while (prev && distance < LEA_SEARCH_THRESHOLD)
15979 if (NONDEBUG_INSN_P (prev))
15982 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15983 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15984 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15985 && (regno1 == DF_REF_REGNO (*def_rec)
15986 || regno2 == DF_REF_REGNO (*def_rec)))
15988 insn_type = get_attr_type (prev);
15989 if (insn_type != TYPE_LEA)
15993 if (prev == BB_HEAD (bb))
15995 prev = PREV_INSN (prev);
15999 if (distance < LEA_SEARCH_THRESHOLD)
16003 bool simple_loop = false;
16005 FOR_EACH_EDGE (e, ei, bb->preds)
16008 simple_loop = true;
16014 rtx prev = BB_END (bb);
16017 && distance < LEA_SEARCH_THRESHOLD)
16019 if (NONDEBUG_INSN_P (prev))
16022 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16023 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16024 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16025 && (regno1 == DF_REF_REGNO (*def_rec)
16026 || regno2 == DF_REF_REGNO (*def_rec)))
16028 insn_type = get_attr_type (prev);
16029 if (insn_type != TYPE_LEA)
16033 prev = PREV_INSN (prev);
16041 /* get_attr_type may modify recog data. We want to make sure
16042 that recog data is valid for instruction INSN, on which
16043 distance_non_agu_define is called. INSN is unchanged here. */
16044 extract_insn_cached (insn);
16048 /* Return the distance between INSN and the next insn that uses
16049 register number REGNO0 in memory address. Return -1 if no such
16050 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16053 distance_agu_use (unsigned int regno0, rtx insn)
16055 basic_block bb = BLOCK_FOR_INSN (insn);
16060 if (insn != BB_END (bb))
16062 rtx next = NEXT_INSN (insn);
16063 while (next && distance < LEA_SEARCH_THRESHOLD)
16065 if (NONDEBUG_INSN_P (next))
16069 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16070 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16071 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16072 && regno0 == DF_REF_REGNO (*use_rec))
16074 /* Return DISTANCE if OP0 is used in memory
16075 address in NEXT. */
16079 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16080 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16081 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16082 && regno0 == DF_REF_REGNO (*def_rec))
16084 /* Return -1 if OP0 is set in NEXT. */
16088 if (next == BB_END (bb))
16090 next = NEXT_INSN (next);
16094 if (distance < LEA_SEARCH_THRESHOLD)
16098 bool simple_loop = false;
16100 FOR_EACH_EDGE (e, ei, bb->succs)
16103 simple_loop = true;
16109 rtx next = BB_HEAD (bb);
16112 && distance < LEA_SEARCH_THRESHOLD)
16114 if (NONDEBUG_INSN_P (next))
16118 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16119 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16120 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16121 && regno0 == DF_REF_REGNO (*use_rec))
16123 /* Return DISTANCE if OP0 is used in memory
16124 address in NEXT. */
16128 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16129 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16130 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16131 && regno0 == DF_REF_REGNO (*def_rec))
16133 /* Return -1 if OP0 is set in NEXT. */
16138 next = NEXT_INSN (next);
16146 /* Define this macro to tune LEA priority vs ADD, it take effect when
16147 there is a dilemma of choicing LEA or ADD
16148 Negative value: ADD is more preferred than LEA
16150 Positive value: LEA is more preferred than ADD*/
16151 #define IX86_LEA_PRIORITY 2
16153 /* Return true if it is ok to optimize an ADD operation to LEA
16154 operation to avoid flag register consumation. For most processors,
16155 ADD is faster than LEA. For the processors like ATOM, if the
16156 destination register of LEA holds an actual address which will be
16157 used soon, LEA is better and otherwise ADD is better. */
16160 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16162 unsigned int regno0 = true_regnum (operands[0]);
16163 unsigned int regno1 = true_regnum (operands[1]);
16164 unsigned int regno2 = true_regnum (operands[2]);
16166 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16167 if (regno0 != regno1 && regno0 != regno2)
16170 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16174 int dist_define, dist_use;
16176 /* Return false if REGNO0 isn't used in memory address. */
16177 dist_use = distance_agu_use (regno0, insn);
16181 dist_define = distance_non_agu_define (regno1, regno2, insn);
16182 if (dist_define <= 0)
16185 /* If this insn has both backward non-agu dependence and forward
16186 agu dependence, the one with short distance take effect. */
16187 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16194 /* Return true if destination reg of SET_BODY is shift count of
16198 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16204 /* Retrieve destination of SET_BODY. */
16205 switch (GET_CODE (set_body))
16208 set_dest = SET_DEST (set_body);
16209 if (!set_dest || !REG_P (set_dest))
16213 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16214 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16222 /* Retrieve shift count of USE_BODY. */
16223 switch (GET_CODE (use_body))
16226 shift_rtx = XEXP (use_body, 1);
16229 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16230 if (ix86_dep_by_shift_count_body (set_body,
16231 XVECEXP (use_body, 0, i)))
16239 && (GET_CODE (shift_rtx) == ASHIFT
16240 || GET_CODE (shift_rtx) == LSHIFTRT
16241 || GET_CODE (shift_rtx) == ASHIFTRT
16242 || GET_CODE (shift_rtx) == ROTATE
16243 || GET_CODE (shift_rtx) == ROTATERT))
16245 rtx shift_count = XEXP (shift_rtx, 1);
16247 /* Return true if shift count is dest of SET_BODY. */
16248 if (REG_P (shift_count)
16249 && true_regnum (set_dest) == true_regnum (shift_count))
16256 /* Return true if destination reg of SET_INSN is shift count of
16260 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16262 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16263 PATTERN (use_insn));
16266 /* Return TRUE or FALSE depending on whether the unary operator meets the
16267 appropriate constraints. */
16270 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16271 enum machine_mode mode ATTRIBUTE_UNUSED,
16272 rtx operands[2] ATTRIBUTE_UNUSED)
16274 /* If one of operands is memory, source and destination must match. */
16275 if ((MEM_P (operands[0])
16276 || MEM_P (operands[1]))
16277 && ! rtx_equal_p (operands[0], operands[1]))
16282 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16283 are ok, keeping in mind the possible movddup alternative. */
16286 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16288 if (MEM_P (operands[0]))
16289 return rtx_equal_p (operands[0], operands[1 + high]);
16290 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16291 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16295 /* Post-reload splitter for converting an SF or DFmode value in an
16296 SSE register into an unsigned SImode. */
16299 ix86_split_convert_uns_si_sse (rtx operands[])
16301 enum machine_mode vecmode;
16302 rtx value, large, zero_or_two31, input, two31, x;
16304 large = operands[1];
16305 zero_or_two31 = operands[2];
16306 input = operands[3];
16307 two31 = operands[4];
16308 vecmode = GET_MODE (large);
16309 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16311 /* Load up the value into the low element. We must ensure that the other
16312 elements are valid floats -- zero is the easiest such value. */
16315 if (vecmode == V4SFmode)
16316 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16318 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16322 input = gen_rtx_REG (vecmode, REGNO (input));
16323 emit_move_insn (value, CONST0_RTX (vecmode));
16324 if (vecmode == V4SFmode)
16325 emit_insn (gen_sse_movss (value, value, input));
16327 emit_insn (gen_sse2_movsd (value, value, input));
16330 emit_move_insn (large, two31);
16331 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16333 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16334 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16336 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16337 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16339 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16340 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16342 large = gen_rtx_REG (V4SImode, REGNO (large));
16343 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16345 x = gen_rtx_REG (V4SImode, REGNO (value));
16346 if (vecmode == V4SFmode)
16347 emit_insn (gen_sse2_cvttps2dq (x, value));
16349 emit_insn (gen_sse2_cvttpd2dq (x, value));
16352 emit_insn (gen_xorv4si3 (value, value, large));
16355 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16356 Expects the 64-bit DImode to be supplied in a pair of integral
16357 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16358 -mfpmath=sse, !optimize_size only. */
16361 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16363 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16364 rtx int_xmm, fp_xmm;
16365 rtx biases, exponents;
16368 int_xmm = gen_reg_rtx (V4SImode);
16369 if (TARGET_INTER_UNIT_MOVES)
16370 emit_insn (gen_movdi_to_sse (int_xmm, input));
16371 else if (TARGET_SSE_SPLIT_REGS)
16373 emit_clobber (int_xmm);
16374 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16378 x = gen_reg_rtx (V2DImode);
16379 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16380 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16383 x = gen_rtx_CONST_VECTOR (V4SImode,
16384 gen_rtvec (4, GEN_INT (0x43300000UL),
16385 GEN_INT (0x45300000UL),
16386 const0_rtx, const0_rtx));
16387 exponents = validize_mem (force_const_mem (V4SImode, x));
16389 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16390 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16392 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16393 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16394 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16395 (0x1.0p84 + double(fp_value_hi_xmm)).
16396 Note these exponents differ by 32. */
16398 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16400 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16401 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16402 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16403 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16404 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16405 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16406 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16407 biases = validize_mem (force_const_mem (V2DFmode, biases));
16408 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16410 /* Add the upper and lower DFmode values together. */
16412 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16415 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16416 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16417 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16420 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16423 /* Not used, but eases macroization of patterns. */
16425 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16426 rtx input ATTRIBUTE_UNUSED)
16428 gcc_unreachable ();
16431 /* Convert an unsigned SImode value into a DFmode. Only currently used
16432 for SSE, but applicable anywhere. */
16435 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16437 REAL_VALUE_TYPE TWO31r;
16440 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16441 NULL, 1, OPTAB_DIRECT);
16443 fp = gen_reg_rtx (DFmode);
16444 emit_insn (gen_floatsidf2 (fp, x));
16446 real_ldexp (&TWO31r, &dconst1, 31);
16447 x = const_double_from_real_value (TWO31r, DFmode);
16449 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16451 emit_move_insn (target, x);
16454 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16455 32-bit mode; otherwise we have a direct convert instruction. */
16458 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16460 REAL_VALUE_TYPE TWO32r;
16461 rtx fp_lo, fp_hi, x;
16463 fp_lo = gen_reg_rtx (DFmode);
16464 fp_hi = gen_reg_rtx (DFmode);
16466 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16468 real_ldexp (&TWO32r, &dconst1, 32);
16469 x = const_double_from_real_value (TWO32r, DFmode);
16470 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16472 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16474 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16477 emit_move_insn (target, x);
16480 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16481 For x86_32, -mfpmath=sse, !optimize_size only. */
16483 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16485 REAL_VALUE_TYPE ONE16r;
16486 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16488 real_ldexp (&ONE16r, &dconst1, 16);
16489 x = const_double_from_real_value (ONE16r, SFmode);
16490 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16491 NULL, 0, OPTAB_DIRECT);
16492 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16493 NULL, 0, OPTAB_DIRECT);
16494 fp_hi = gen_reg_rtx (SFmode);
16495 fp_lo = gen_reg_rtx (SFmode);
16496 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16497 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16498 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16500 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16502 if (!rtx_equal_p (target, fp_hi))
16503 emit_move_insn (target, fp_hi);
16506 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16507 then replicate the value for all elements of the vector
16511 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16518 v = gen_rtvec (4, value, value, value, value);
16519 return gen_rtx_CONST_VECTOR (V4SImode, v);
16523 v = gen_rtvec (2, value, value);
16524 return gen_rtx_CONST_VECTOR (V2DImode, v);
16528 v = gen_rtvec (8, value, value, value, value,
16529 value, value, value, value);
16531 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16532 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16533 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16534 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16535 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16539 v = gen_rtvec (4, value, value, value, value);
16541 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16542 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16543 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16547 v = gen_rtvec (4, value, value, value, value);
16549 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16550 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16551 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16555 v = gen_rtvec (2, value, value);
16557 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16558 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16561 gcc_unreachable ();
16565 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16566 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16567 for an SSE register. If VECT is true, then replicate the mask for
16568 all elements of the vector register. If INVERT is true, then create
16569 a mask excluding the sign bit. */
16572 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16574 enum machine_mode vec_mode, imode;
16575 HOST_WIDE_INT hi, lo;
16580 /* Find the sign bit, sign extended to 2*HWI. */
16587 mode = GET_MODE_INNER (mode);
16589 lo = 0x80000000, hi = lo < 0;
16596 mode = GET_MODE_INNER (mode);
16598 if (HOST_BITS_PER_WIDE_INT >= 64)
16599 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16601 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16606 vec_mode = VOIDmode;
16607 if (HOST_BITS_PER_WIDE_INT >= 64)
16610 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16617 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16621 lo = ~lo, hi = ~hi;
16627 mask = immed_double_const (lo, hi, imode);
16629 vec = gen_rtvec (2, v, mask);
16630 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16631 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16638 gcc_unreachable ();
16642 lo = ~lo, hi = ~hi;
16644 /* Force this value into the low part of a fp vector constant. */
16645 mask = immed_double_const (lo, hi, imode);
16646 mask = gen_lowpart (mode, mask);
16648 if (vec_mode == VOIDmode)
16649 return force_reg (mode, mask);
16651 v = ix86_build_const_vector (vec_mode, vect, mask);
16652 return force_reg (vec_mode, v);
16655 /* Generate code for floating point ABS or NEG. */
16658 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16661 rtx mask, set, dst, src;
16662 bool use_sse = false;
16663 bool vector_mode = VECTOR_MODE_P (mode);
16664 enum machine_mode vmode = mode;
16668 else if (mode == TFmode)
16670 else if (TARGET_SSE_MATH)
16672 use_sse = SSE_FLOAT_MODE_P (mode);
16673 if (mode == SFmode)
16675 else if (mode == DFmode)
16679 /* NEG and ABS performed with SSE use bitwise mask operations.
16680 Create the appropriate mask now. */
16682 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16689 set = gen_rtx_fmt_e (code, mode, src);
16690 set = gen_rtx_SET (VOIDmode, dst, set);
16697 use = gen_rtx_USE (VOIDmode, mask);
16699 par = gen_rtvec (2, set, use);
16702 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16703 par = gen_rtvec (3, set, use, clob);
16705 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16711 /* Expand a copysign operation. Special case operand 0 being a constant. */
16714 ix86_expand_copysign (rtx operands[])
16716 enum machine_mode mode, vmode;
16717 rtx dest, op0, op1, mask, nmask;
16719 dest = operands[0];
16723 mode = GET_MODE (dest);
16725 if (mode == SFmode)
16727 else if (mode == DFmode)
16732 if (GET_CODE (op0) == CONST_DOUBLE)
16734 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16736 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16737 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16739 if (mode == SFmode || mode == DFmode)
16741 if (op0 == CONST0_RTX (mode))
16742 op0 = CONST0_RTX (vmode);
16745 rtx v = ix86_build_const_vector (vmode, false, op0);
16747 op0 = force_reg (vmode, v);
16750 else if (op0 != CONST0_RTX (mode))
16751 op0 = force_reg (mode, op0);
16753 mask = ix86_build_signbit_mask (vmode, 0, 0);
16755 if (mode == SFmode)
16756 copysign_insn = gen_copysignsf3_const;
16757 else if (mode == DFmode)
16758 copysign_insn = gen_copysigndf3_const;
16760 copysign_insn = gen_copysigntf3_const;
16762 emit_insn (copysign_insn (dest, op0, op1, mask));
16766 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16768 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16769 mask = ix86_build_signbit_mask (vmode, 0, 0);
16771 if (mode == SFmode)
16772 copysign_insn = gen_copysignsf3_var;
16773 else if (mode == DFmode)
16774 copysign_insn = gen_copysigndf3_var;
16776 copysign_insn = gen_copysigntf3_var;
16778 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16782 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16783 be a constant, and so has already been expanded into a vector constant. */
16786 ix86_split_copysign_const (rtx operands[])
16788 enum machine_mode mode, vmode;
16789 rtx dest, op0, mask, x;
16791 dest = operands[0];
16793 mask = operands[3];
16795 mode = GET_MODE (dest);
16796 vmode = GET_MODE (mask);
16798 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16799 x = gen_rtx_AND (vmode, dest, mask);
16800 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16802 if (op0 != CONST0_RTX (vmode))
16804 x = gen_rtx_IOR (vmode, dest, op0);
16805 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16809 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16810 so we have to do two masks. */
16813 ix86_split_copysign_var (rtx operands[])
16815 enum machine_mode mode, vmode;
16816 rtx dest, scratch, op0, op1, mask, nmask, x;
16818 dest = operands[0];
16819 scratch = operands[1];
16822 nmask = operands[4];
16823 mask = operands[5];
16825 mode = GET_MODE (dest);
16826 vmode = GET_MODE (mask);
16828 if (rtx_equal_p (op0, op1))
16830 /* Shouldn't happen often (it's useless, obviously), but when it does
16831 we'd generate incorrect code if we continue below. */
16832 emit_move_insn (dest, op0);
16836 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16838 gcc_assert (REGNO (op1) == REGNO (scratch));
16840 x = gen_rtx_AND (vmode, scratch, mask);
16841 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16844 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16845 x = gen_rtx_NOT (vmode, dest);
16846 x = gen_rtx_AND (vmode, x, op0);
16847 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16851 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16853 x = gen_rtx_AND (vmode, scratch, mask);
16855 else /* alternative 2,4 */
16857 gcc_assert (REGNO (mask) == REGNO (scratch));
16858 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16859 x = gen_rtx_AND (vmode, scratch, op1);
16861 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16863 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16865 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16866 x = gen_rtx_AND (vmode, dest, nmask);
16868 else /* alternative 3,4 */
16870 gcc_assert (REGNO (nmask) == REGNO (dest));
16872 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16873 x = gen_rtx_AND (vmode, dest, op0);
16875 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16878 x = gen_rtx_IOR (vmode, dest, scratch);
16879 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16882 /* Return TRUE or FALSE depending on whether the first SET in INSN
16883 has source and destination with matching CC modes, and that the
16884 CC mode is at least as constrained as REQ_MODE. */
16887 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16890 enum machine_mode set_mode;
16892 set = PATTERN (insn);
16893 if (GET_CODE (set) == PARALLEL)
16894 set = XVECEXP (set, 0, 0);
16895 gcc_assert (GET_CODE (set) == SET);
16896 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16898 set_mode = GET_MODE (SET_DEST (set));
16902 if (req_mode != CCNOmode
16903 && (req_mode != CCmode
16904 || XEXP (SET_SRC (set), 1) != const0_rtx))
16908 if (req_mode == CCGCmode)
16912 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16916 if (req_mode == CCZmode)
16926 if (set_mode != req_mode)
16931 gcc_unreachable ();
16934 return GET_MODE (SET_SRC (set)) == set_mode;
16937 /* Generate insn patterns to do an integer compare of OPERANDS. */
16940 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16942 enum machine_mode cmpmode;
16945 cmpmode = SELECT_CC_MODE (code, op0, op1);
16946 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16948 /* This is very simple, but making the interface the same as in the
16949 FP case makes the rest of the code easier. */
16950 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16951 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16953 /* Return the test that should be put into the flags user, i.e.
16954 the bcc, scc, or cmov instruction. */
16955 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16958 /* Figure out whether to use ordered or unordered fp comparisons.
16959 Return the appropriate mode to use. */
16962 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16964 /* ??? In order to make all comparisons reversible, we do all comparisons
16965 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16966 all forms trapping and nontrapping comparisons, we can make inequality
16967 comparisons trapping again, since it results in better code when using
16968 FCOM based compares. */
16969 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16973 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16975 enum machine_mode mode = GET_MODE (op0);
16977 if (SCALAR_FLOAT_MODE_P (mode))
16979 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16980 return ix86_fp_compare_mode (code);
16985 /* Only zero flag is needed. */
16986 case EQ: /* ZF=0 */
16987 case NE: /* ZF!=0 */
16989 /* Codes needing carry flag. */
16990 case GEU: /* CF=0 */
16991 case LTU: /* CF=1 */
16992 /* Detect overflow checks. They need just the carry flag. */
16993 if (GET_CODE (op0) == PLUS
16994 && rtx_equal_p (op1, XEXP (op0, 0)))
16998 case GTU: /* CF=0 & ZF=0 */
16999 case LEU: /* CF=1 | ZF=1 */
17000 /* Detect overflow checks. They need just the carry flag. */
17001 if (GET_CODE (op0) == MINUS
17002 && rtx_equal_p (op1, XEXP (op0, 0)))
17006 /* Codes possibly doable only with sign flag when
17007 comparing against zero. */
17008 case GE: /* SF=OF or SF=0 */
17009 case LT: /* SF<>OF or SF=1 */
17010 if (op1 == const0_rtx)
17013 /* For other cases Carry flag is not required. */
17015 /* Codes doable only with sign flag when comparing
17016 against zero, but we miss jump instruction for it
17017 so we need to use relational tests against overflow
17018 that thus needs to be zero. */
17019 case GT: /* ZF=0 & SF=OF */
17020 case LE: /* ZF=1 | SF<>OF */
17021 if (op1 == const0_rtx)
17025 /* strcmp pattern do (use flags) and combine may ask us for proper
17030 gcc_unreachable ();
17034 /* Return the fixed registers used for condition codes. */
17037 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17044 /* If two condition code modes are compatible, return a condition code
17045 mode which is compatible with both. Otherwise, return
17048 static enum machine_mode
17049 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17054 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17057 if ((m1 == CCGCmode && m2 == CCGOCmode)
17058 || (m1 == CCGOCmode && m2 == CCGCmode))
17064 gcc_unreachable ();
17094 /* These are only compatible with themselves, which we already
17101 /* Return a comparison we can do and that it is equivalent to
17102 swap_condition (code) apart possibly from orderedness.
17103 But, never change orderedness if TARGET_IEEE_FP, returning
17104 UNKNOWN in that case if necessary. */
17106 static enum rtx_code
17107 ix86_fp_swap_condition (enum rtx_code code)
17111 case GT: /* GTU - CF=0 & ZF=0 */
17112 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17113 case GE: /* GEU - CF=0 */
17114 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17115 case UNLT: /* LTU - CF=1 */
17116 return TARGET_IEEE_FP ? UNKNOWN : GT;
17117 case UNLE: /* LEU - CF=1 | ZF=1 */
17118 return TARGET_IEEE_FP ? UNKNOWN : GE;
17120 return swap_condition (code);
17124 /* Return cost of comparison CODE using the best strategy for performance.
17125 All following functions do use number of instructions as a cost metrics.
17126 In future this should be tweaked to compute bytes for optimize_size and
17127 take into account performance of various instructions on various CPUs. */
17130 ix86_fp_comparison_cost (enum rtx_code code)
17134 /* The cost of code using bit-twiddling on %ah. */
17151 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17155 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17158 gcc_unreachable ();
17161 switch (ix86_fp_comparison_strategy (code))
17163 case IX86_FPCMP_COMI:
17164 return arith_cost > 4 ? 3 : 2;
17165 case IX86_FPCMP_SAHF:
17166 return arith_cost > 4 ? 4 : 3;
17172 /* Return strategy to use for floating-point. We assume that fcomi is always
17173 preferrable where available, since that is also true when looking at size
17174 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17176 enum ix86_fpcmp_strategy
17177 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17179 /* Do fcomi/sahf based test when profitable. */
17182 return IX86_FPCMP_COMI;
17184 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17185 return IX86_FPCMP_SAHF;
17187 return IX86_FPCMP_ARITH;
17190 /* Swap, force into registers, or otherwise massage the two operands
17191 to a fp comparison. The operands are updated in place; the new
17192 comparison code is returned. */
17194 static enum rtx_code
17195 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17197 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17198 rtx op0 = *pop0, op1 = *pop1;
17199 enum machine_mode op_mode = GET_MODE (op0);
17200 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17202 /* All of the unordered compare instructions only work on registers.
17203 The same is true of the fcomi compare instructions. The XFmode
17204 compare instructions require registers except when comparing
17205 against zero or when converting operand 1 from fixed point to
17209 && (fpcmp_mode == CCFPUmode
17210 || (op_mode == XFmode
17211 && ! (standard_80387_constant_p (op0) == 1
17212 || standard_80387_constant_p (op1) == 1)
17213 && GET_CODE (op1) != FLOAT)
17214 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17216 op0 = force_reg (op_mode, op0);
17217 op1 = force_reg (op_mode, op1);
17221 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17222 things around if they appear profitable, otherwise force op0
17223 into a register. */
17225 if (standard_80387_constant_p (op0) == 0
17227 && ! (standard_80387_constant_p (op1) == 0
17230 enum rtx_code new_code = ix86_fp_swap_condition (code);
17231 if (new_code != UNKNOWN)
17234 tmp = op0, op0 = op1, op1 = tmp;
17240 op0 = force_reg (op_mode, op0);
17242 if (CONSTANT_P (op1))
17244 int tmp = standard_80387_constant_p (op1);
17246 op1 = validize_mem (force_const_mem (op_mode, op1));
17250 op1 = force_reg (op_mode, op1);
17253 op1 = force_reg (op_mode, op1);
17257 /* Try to rearrange the comparison to make it cheaper. */
17258 if (ix86_fp_comparison_cost (code)
17259 > ix86_fp_comparison_cost (swap_condition (code))
17260 && (REG_P (op1) || can_create_pseudo_p ()))
17263 tmp = op0, op0 = op1, op1 = tmp;
17264 code = swap_condition (code);
17266 op0 = force_reg (op_mode, op0);
17274 /* Convert comparison codes we use to represent FP comparison to integer
17275 code that will result in proper branch. Return UNKNOWN if no such code
17279 ix86_fp_compare_code_to_integer (enum rtx_code code)
17308 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17311 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17313 enum machine_mode fpcmp_mode, intcmp_mode;
17316 fpcmp_mode = ix86_fp_compare_mode (code);
17317 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17319 /* Do fcomi/sahf based test when profitable. */
17320 switch (ix86_fp_comparison_strategy (code))
17322 case IX86_FPCMP_COMI:
17323 intcmp_mode = fpcmp_mode;
17324 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17325 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17330 case IX86_FPCMP_SAHF:
17331 intcmp_mode = fpcmp_mode;
17332 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17333 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17337 scratch = gen_reg_rtx (HImode);
17338 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17339 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17342 case IX86_FPCMP_ARITH:
17343 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17344 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17345 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17347 scratch = gen_reg_rtx (HImode);
17348 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17350 /* In the unordered case, we have to check C2 for NaN's, which
17351 doesn't happen to work out to anything nice combination-wise.
17352 So do some bit twiddling on the value we've got in AH to come
17353 up with an appropriate set of condition codes. */
17355 intcmp_mode = CCNOmode;
17360 if (code == GT || !TARGET_IEEE_FP)
17362 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17367 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17368 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17369 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17370 intcmp_mode = CCmode;
17376 if (code == LT && TARGET_IEEE_FP)
17378 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17379 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17380 intcmp_mode = CCmode;
17385 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17391 if (code == GE || !TARGET_IEEE_FP)
17393 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17398 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17399 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17405 if (code == LE && TARGET_IEEE_FP)
17407 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17408 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17409 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17410 intcmp_mode = CCmode;
17415 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17421 if (code == EQ && TARGET_IEEE_FP)
17423 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17424 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17425 intcmp_mode = CCmode;
17430 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17436 if (code == NE && TARGET_IEEE_FP)
17438 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17439 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17445 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17451 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17455 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17460 gcc_unreachable ();
17468 /* Return the test that should be put into the flags user, i.e.
17469 the bcc, scc, or cmov instruction. */
17470 return gen_rtx_fmt_ee (code, VOIDmode,
17471 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17476 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17480 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17481 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17483 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17485 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17486 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17489 ret = ix86_expand_int_compare (code, op0, op1);
17495 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17497 enum machine_mode mode = GET_MODE (op0);
17509 tmp = ix86_expand_compare (code, op0, op1);
17510 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17511 gen_rtx_LABEL_REF (VOIDmode, label),
17513 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17520 /* Expand DImode branch into multiple compare+branch. */
17522 rtx lo[2], hi[2], label2;
17523 enum rtx_code code1, code2, code3;
17524 enum machine_mode submode;
17526 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17528 tmp = op0, op0 = op1, op1 = tmp;
17529 code = swap_condition (code);
17532 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17533 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17535 submode = mode == DImode ? SImode : DImode;
17537 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17538 avoid two branches. This costs one extra insn, so disable when
17539 optimizing for size. */
17541 if ((code == EQ || code == NE)
17542 && (!optimize_insn_for_size_p ()
17543 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17548 if (hi[1] != const0_rtx)
17549 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17550 NULL_RTX, 0, OPTAB_WIDEN);
17553 if (lo[1] != const0_rtx)
17554 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17555 NULL_RTX, 0, OPTAB_WIDEN);
17557 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17558 NULL_RTX, 0, OPTAB_WIDEN);
17560 ix86_expand_branch (code, tmp, const0_rtx, label);
17564 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17565 op1 is a constant and the low word is zero, then we can just
17566 examine the high word. Similarly for low word -1 and
17567 less-or-equal-than or greater-than. */
17569 if (CONST_INT_P (hi[1]))
17572 case LT: case LTU: case GE: case GEU:
17573 if (lo[1] == const0_rtx)
17575 ix86_expand_branch (code, hi[0], hi[1], label);
17579 case LE: case LEU: case GT: case GTU:
17580 if (lo[1] == constm1_rtx)
17582 ix86_expand_branch (code, hi[0], hi[1], label);
17590 /* Otherwise, we need two or three jumps. */
17592 label2 = gen_label_rtx ();
17595 code2 = swap_condition (code);
17596 code3 = unsigned_condition (code);
17600 case LT: case GT: case LTU: case GTU:
17603 case LE: code1 = LT; code2 = GT; break;
17604 case GE: code1 = GT; code2 = LT; break;
17605 case LEU: code1 = LTU; code2 = GTU; break;
17606 case GEU: code1 = GTU; code2 = LTU; break;
17608 case EQ: code1 = UNKNOWN; code2 = NE; break;
17609 case NE: code2 = UNKNOWN; break;
17612 gcc_unreachable ();
17617 * if (hi(a) < hi(b)) goto true;
17618 * if (hi(a) > hi(b)) goto false;
17619 * if (lo(a) < lo(b)) goto true;
17623 if (code1 != UNKNOWN)
17624 ix86_expand_branch (code1, hi[0], hi[1], label);
17625 if (code2 != UNKNOWN)
17626 ix86_expand_branch (code2, hi[0], hi[1], label2);
17628 ix86_expand_branch (code3, lo[0], lo[1], label);
17630 if (code2 != UNKNOWN)
17631 emit_label (label2);
17636 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17641 /* Split branch based on floating point condition. */
17643 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17644 rtx target1, rtx target2, rtx tmp, rtx pushed)
17649 if (target2 != pc_rtx)
17652 code = reverse_condition_maybe_unordered (code);
17657 condition = ix86_expand_fp_compare (code, op1, op2,
17660 /* Remove pushed operand from stack. */
17662 ix86_free_from_memory (GET_MODE (pushed));
17664 i = emit_jump_insn (gen_rtx_SET
17666 gen_rtx_IF_THEN_ELSE (VOIDmode,
17667 condition, target1, target2)));
17668 if (split_branch_probability >= 0)
17669 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17673 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17677 gcc_assert (GET_MODE (dest) == QImode);
17679 ret = ix86_expand_compare (code, op0, op1);
17680 PUT_MODE (ret, QImode);
17681 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17684 /* Expand comparison setting or clearing carry flag. Return true when
17685 successful and set pop for the operation. */
17687 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17689 enum machine_mode mode =
17690 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17692 /* Do not handle double-mode compares that go through special path. */
17693 if (mode == (TARGET_64BIT ? TImode : DImode))
17696 if (SCALAR_FLOAT_MODE_P (mode))
17698 rtx compare_op, compare_seq;
17700 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17702 /* Shortcut: following common codes never translate
17703 into carry flag compares. */
17704 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17705 || code == ORDERED || code == UNORDERED)
17708 /* These comparisons require zero flag; swap operands so they won't. */
17709 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17710 && !TARGET_IEEE_FP)
17715 code = swap_condition (code);
17718 /* Try to expand the comparison and verify that we end up with
17719 carry flag based comparison. This fails to be true only when
17720 we decide to expand comparison using arithmetic that is not
17721 too common scenario. */
17723 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17724 compare_seq = get_insns ();
17727 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17728 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17729 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17731 code = GET_CODE (compare_op);
17733 if (code != LTU && code != GEU)
17736 emit_insn (compare_seq);
17741 if (!INTEGRAL_MODE_P (mode))
17750 /* Convert a==0 into (unsigned)a<1. */
17753 if (op1 != const0_rtx)
17756 code = (code == EQ ? LTU : GEU);
17759 /* Convert a>b into b<a or a>=b-1. */
17762 if (CONST_INT_P (op1))
17764 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17765 /* Bail out on overflow. We still can swap operands but that
17766 would force loading of the constant into register. */
17767 if (op1 == const0_rtx
17768 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17770 code = (code == GTU ? GEU : LTU);
17777 code = (code == GTU ? LTU : GEU);
17781 /* Convert a>=0 into (unsigned)a<0x80000000. */
17784 if (mode == DImode || op1 != const0_rtx)
17786 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17787 code = (code == LT ? GEU : LTU);
17791 if (mode == DImode || op1 != constm1_rtx)
17793 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17794 code = (code == LE ? GEU : LTU);
17800 /* Swapping operands may cause constant to appear as first operand. */
17801 if (!nonimmediate_operand (op0, VOIDmode))
17803 if (!can_create_pseudo_p ())
17805 op0 = force_reg (mode, op0);
17807 *pop = ix86_expand_compare (code, op0, op1);
17808 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17813 ix86_expand_int_movcc (rtx operands[])
17815 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17816 rtx compare_seq, compare_op;
17817 enum machine_mode mode = GET_MODE (operands[0]);
17818 bool sign_bit_compare_p = false;
17819 rtx op0 = XEXP (operands[1], 0);
17820 rtx op1 = XEXP (operands[1], 1);
17823 compare_op = ix86_expand_compare (code, op0, op1);
17824 compare_seq = get_insns ();
17827 compare_code = GET_CODE (compare_op);
17829 if ((op1 == const0_rtx && (code == GE || code == LT))
17830 || (op1 == constm1_rtx && (code == GT || code == LE)))
17831 sign_bit_compare_p = true;
17833 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17834 HImode insns, we'd be swallowed in word prefix ops. */
17836 if ((mode != HImode || TARGET_FAST_PREFIX)
17837 && (mode != (TARGET_64BIT ? TImode : DImode))
17838 && CONST_INT_P (operands[2])
17839 && CONST_INT_P (operands[3]))
17841 rtx out = operands[0];
17842 HOST_WIDE_INT ct = INTVAL (operands[2]);
17843 HOST_WIDE_INT cf = INTVAL (operands[3]);
17844 HOST_WIDE_INT diff;
17847 /* Sign bit compares are better done using shifts than we do by using
17849 if (sign_bit_compare_p
17850 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17852 /* Detect overlap between destination and compare sources. */
17855 if (!sign_bit_compare_p)
17858 bool fpcmp = false;
17860 compare_code = GET_CODE (compare_op);
17862 flags = XEXP (compare_op, 0);
17864 if (GET_MODE (flags) == CCFPmode
17865 || GET_MODE (flags) == CCFPUmode)
17869 = ix86_fp_compare_code_to_integer (compare_code);
17872 /* To simplify rest of code, restrict to the GEU case. */
17873 if (compare_code == LTU)
17875 HOST_WIDE_INT tmp = ct;
17878 compare_code = reverse_condition (compare_code);
17879 code = reverse_condition (code);
17884 PUT_CODE (compare_op,
17885 reverse_condition_maybe_unordered
17886 (GET_CODE (compare_op)));
17888 PUT_CODE (compare_op,
17889 reverse_condition (GET_CODE (compare_op)));
17893 if (reg_overlap_mentioned_p (out, op0)
17894 || reg_overlap_mentioned_p (out, op1))
17895 tmp = gen_reg_rtx (mode);
17897 if (mode == DImode)
17898 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17900 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17901 flags, compare_op));
17905 if (code == GT || code == GE)
17906 code = reverse_condition (code);
17909 HOST_WIDE_INT tmp = ct;
17914 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17927 tmp = expand_simple_binop (mode, PLUS,
17929 copy_rtx (tmp), 1, OPTAB_DIRECT);
17940 tmp = expand_simple_binop (mode, IOR,
17942 copy_rtx (tmp), 1, OPTAB_DIRECT);
17944 else if (diff == -1 && ct)
17954 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17956 tmp = expand_simple_binop (mode, PLUS,
17957 copy_rtx (tmp), GEN_INT (cf),
17958 copy_rtx (tmp), 1, OPTAB_DIRECT);
17966 * andl cf - ct, dest
17976 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17979 tmp = expand_simple_binop (mode, AND,
17981 gen_int_mode (cf - ct, mode),
17982 copy_rtx (tmp), 1, OPTAB_DIRECT);
17984 tmp = expand_simple_binop (mode, PLUS,
17985 copy_rtx (tmp), GEN_INT (ct),
17986 copy_rtx (tmp), 1, OPTAB_DIRECT);
17989 if (!rtx_equal_p (tmp, out))
17990 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17997 enum machine_mode cmp_mode = GET_MODE (op0);
18000 tmp = ct, ct = cf, cf = tmp;
18003 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18005 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18007 /* We may be reversing unordered compare to normal compare, that
18008 is not valid in general (we may convert non-trapping condition
18009 to trapping one), however on i386 we currently emit all
18010 comparisons unordered. */
18011 compare_code = reverse_condition_maybe_unordered (compare_code);
18012 code = reverse_condition_maybe_unordered (code);
18016 compare_code = reverse_condition (compare_code);
18017 code = reverse_condition (code);
18021 compare_code = UNKNOWN;
18022 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18023 && CONST_INT_P (op1))
18025 if (op1 == const0_rtx
18026 && (code == LT || code == GE))
18027 compare_code = code;
18028 else if (op1 == constm1_rtx)
18032 else if (code == GT)
18037 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18038 if (compare_code != UNKNOWN
18039 && GET_MODE (op0) == GET_MODE (out)
18040 && (cf == -1 || ct == -1))
18042 /* If lea code below could be used, only optimize
18043 if it results in a 2 insn sequence. */
18045 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18046 || diff == 3 || diff == 5 || diff == 9)
18047 || (compare_code == LT && ct == -1)
18048 || (compare_code == GE && cf == -1))
18051 * notl op1 (if necessary)
18059 code = reverse_condition (code);
18062 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18064 out = expand_simple_binop (mode, IOR,
18066 out, 1, OPTAB_DIRECT);
18067 if (out != operands[0])
18068 emit_move_insn (operands[0], out);
18075 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18076 || diff == 3 || diff == 5 || diff == 9)
18077 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18079 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18085 * lea cf(dest*(ct-cf)),dest
18089 * This also catches the degenerate setcc-only case.
18095 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18098 /* On x86_64 the lea instruction operates on Pmode, so we need
18099 to get arithmetics done in proper mode to match. */
18101 tmp = copy_rtx (out);
18105 out1 = copy_rtx (out);
18106 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18110 tmp = gen_rtx_PLUS (mode, tmp, out1);
18116 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18119 if (!rtx_equal_p (tmp, out))
18122 out = force_operand (tmp, copy_rtx (out));
18124 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18126 if (!rtx_equal_p (out, operands[0]))
18127 emit_move_insn (operands[0], copy_rtx (out));
18133 * General case: Jumpful:
18134 * xorl dest,dest cmpl op1, op2
18135 * cmpl op1, op2 movl ct, dest
18136 * setcc dest jcc 1f
18137 * decl dest movl cf, dest
18138 * andl (cf-ct),dest 1:
18141 * Size 20. Size 14.
18143 * This is reasonably steep, but branch mispredict costs are
18144 * high on modern cpus, so consider failing only if optimizing
18148 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18149 && BRANCH_COST (optimize_insn_for_speed_p (),
18154 enum machine_mode cmp_mode = GET_MODE (op0);
18159 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18161 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18163 /* We may be reversing unordered compare to normal compare,
18164 that is not valid in general (we may convert non-trapping
18165 condition to trapping one), however on i386 we currently
18166 emit all comparisons unordered. */
18167 code = reverse_condition_maybe_unordered (code);
18171 code = reverse_condition (code);
18172 if (compare_code != UNKNOWN)
18173 compare_code = reverse_condition (compare_code);
18177 if (compare_code != UNKNOWN)
18179 /* notl op1 (if needed)
18184 For x < 0 (resp. x <= -1) there will be no notl,
18185 so if possible swap the constants to get rid of the
18187 True/false will be -1/0 while code below (store flag
18188 followed by decrement) is 0/-1, so the constants need
18189 to be exchanged once more. */
18191 if (compare_code == GE || !cf)
18193 code = reverse_condition (code);
18198 HOST_WIDE_INT tmp = cf;
18203 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18207 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18209 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18211 copy_rtx (out), 1, OPTAB_DIRECT);
18214 out = expand_simple_binop (mode, AND, copy_rtx (out),
18215 gen_int_mode (cf - ct, mode),
18216 copy_rtx (out), 1, OPTAB_DIRECT);
18218 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18219 copy_rtx (out), 1, OPTAB_DIRECT);
18220 if (!rtx_equal_p (out, operands[0]))
18221 emit_move_insn (operands[0], copy_rtx (out));
18227 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18229 /* Try a few things more with specific constants and a variable. */
18232 rtx var, orig_out, out, tmp;
18234 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18237 /* If one of the two operands is an interesting constant, load a
18238 constant with the above and mask it in with a logical operation. */
18240 if (CONST_INT_P (operands[2]))
18243 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18244 operands[3] = constm1_rtx, op = and_optab;
18245 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18246 operands[3] = const0_rtx, op = ior_optab;
18250 else if (CONST_INT_P (operands[3]))
18253 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18254 operands[2] = constm1_rtx, op = and_optab;
18255 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18256 operands[2] = const0_rtx, op = ior_optab;
18263 orig_out = operands[0];
18264 tmp = gen_reg_rtx (mode);
18267 /* Recurse to get the constant loaded. */
18268 if (ix86_expand_int_movcc (operands) == 0)
18271 /* Mask in the interesting variable. */
18272 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18274 if (!rtx_equal_p (out, orig_out))
18275 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18281 * For comparison with above,
18291 if (! nonimmediate_operand (operands[2], mode))
18292 operands[2] = force_reg (mode, operands[2]);
18293 if (! nonimmediate_operand (operands[3], mode))
18294 operands[3] = force_reg (mode, operands[3]);
18296 if (! register_operand (operands[2], VOIDmode)
18298 || ! register_operand (operands[3], VOIDmode)))
18299 operands[2] = force_reg (mode, operands[2]);
18302 && ! register_operand (operands[3], VOIDmode))
18303 operands[3] = force_reg (mode, operands[3]);
18305 emit_insn (compare_seq);
18306 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18307 gen_rtx_IF_THEN_ELSE (mode,
18308 compare_op, operands[2],
18313 /* Swap, force into registers, or otherwise massage the two operands
18314 to an sse comparison with a mask result. Thus we differ a bit from
18315 ix86_prepare_fp_compare_args which expects to produce a flags result.
18317 The DEST operand exists to help determine whether to commute commutative
18318 operators. The POP0/POP1 operands are updated in place. The new
18319 comparison code is returned, or UNKNOWN if not implementable. */
18321 static enum rtx_code
18322 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18323 rtx *pop0, rtx *pop1)
18331 /* We have no LTGT as an operator. We could implement it with
18332 NE & ORDERED, but this requires an extra temporary. It's
18333 not clear that it's worth it. */
18340 /* These are supported directly. */
18347 /* For commutative operators, try to canonicalize the destination
18348 operand to be first in the comparison - this helps reload to
18349 avoid extra moves. */
18350 if (!dest || !rtx_equal_p (dest, *pop1))
18358 /* These are not supported directly. Swap the comparison operands
18359 to transform into something that is supported. */
18363 code = swap_condition (code);
18367 gcc_unreachable ();
18373 /* Detect conditional moves that exactly match min/max operational
18374 semantics. Note that this is IEEE safe, as long as we don't
18375 interchange the operands.
18377 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18378 and TRUE if the operation is successful and instructions are emitted. */
18381 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18382 rtx cmp_op1, rtx if_true, rtx if_false)
18384 enum machine_mode mode;
18390 else if (code == UNGE)
18393 if_true = if_false;
18399 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18401 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18406 mode = GET_MODE (dest);
18408 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18409 but MODE may be a vector mode and thus not appropriate. */
18410 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18412 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18415 if_true = force_reg (mode, if_true);
18416 v = gen_rtvec (2, if_true, if_false);
18417 tmp = gen_rtx_UNSPEC (mode, v, u);
18421 code = is_min ? SMIN : SMAX;
18422 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18425 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18429 /* Expand an sse vector comparison. Return the register with the result. */
18432 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18433 rtx op_true, rtx op_false)
18435 enum machine_mode mode = GET_MODE (dest);
18438 cmp_op0 = force_reg (mode, cmp_op0);
18439 if (!nonimmediate_operand (cmp_op1, mode))
18440 cmp_op1 = force_reg (mode, cmp_op1);
18443 || reg_overlap_mentioned_p (dest, op_true)
18444 || reg_overlap_mentioned_p (dest, op_false))
18445 dest = gen_reg_rtx (mode);
18447 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18448 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18453 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18454 operations. This is used for both scalar and vector conditional moves. */
18457 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18459 enum machine_mode mode = GET_MODE (dest);
18462 if (op_false == CONST0_RTX (mode))
18464 op_true = force_reg (mode, op_true);
18465 x = gen_rtx_AND (mode, cmp, op_true);
18466 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18468 else if (op_true == CONST0_RTX (mode))
18470 op_false = force_reg (mode, op_false);
18471 x = gen_rtx_NOT (mode, cmp);
18472 x = gen_rtx_AND (mode, x, op_false);
18473 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18475 else if (TARGET_XOP)
18477 rtx pcmov = gen_rtx_SET (mode, dest,
18478 gen_rtx_IF_THEN_ELSE (mode, cmp,
18485 op_true = force_reg (mode, op_true);
18486 op_false = force_reg (mode, op_false);
18488 t2 = gen_reg_rtx (mode);
18490 t3 = gen_reg_rtx (mode);
18494 x = gen_rtx_AND (mode, op_true, cmp);
18495 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18497 x = gen_rtx_NOT (mode, cmp);
18498 x = gen_rtx_AND (mode, x, op_false);
18499 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18501 x = gen_rtx_IOR (mode, t3, t2);
18502 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18506 /* Expand a floating-point conditional move. Return true if successful. */
18509 ix86_expand_fp_movcc (rtx operands[])
18511 enum machine_mode mode = GET_MODE (operands[0]);
18512 enum rtx_code code = GET_CODE (operands[1]);
18513 rtx tmp, compare_op;
18514 rtx op0 = XEXP (operands[1], 0);
18515 rtx op1 = XEXP (operands[1], 1);
18517 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18519 enum machine_mode cmode;
18521 /* Since we've no cmove for sse registers, don't force bad register
18522 allocation just to gain access to it. Deny movcc when the
18523 comparison mode doesn't match the move mode. */
18524 cmode = GET_MODE (op0);
18525 if (cmode == VOIDmode)
18526 cmode = GET_MODE (op1);
18530 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18531 if (code == UNKNOWN)
18534 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18535 operands[2], operands[3]))
18538 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18539 operands[2], operands[3]);
18540 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18544 /* The floating point conditional move instructions don't directly
18545 support conditions resulting from a signed integer comparison. */
18547 compare_op = ix86_expand_compare (code, op0, op1);
18548 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18550 tmp = gen_reg_rtx (QImode);
18551 ix86_expand_setcc (tmp, code, op0, op1);
18553 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18556 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18557 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18558 operands[2], operands[3])));
18563 /* Expand a floating-point vector conditional move; a vcond operation
18564 rather than a movcc operation. */
18567 ix86_expand_fp_vcond (rtx operands[])
18569 enum rtx_code code = GET_CODE (operands[3]);
18572 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18573 &operands[4], &operands[5]);
18574 if (code == UNKNOWN)
18577 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18578 operands[5], operands[1], operands[2]))
18581 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18582 operands[1], operands[2]);
18583 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18587 /* Expand a signed/unsigned integral vector conditional move. */
18590 ix86_expand_int_vcond (rtx operands[])
18592 enum machine_mode mode = GET_MODE (operands[0]);
18593 enum rtx_code code = GET_CODE (operands[3]);
18594 bool negate = false;
18597 cop0 = operands[4];
18598 cop1 = operands[5];
18600 /* XOP supports all of the comparisons on all vector int types. */
18603 /* Canonicalize the comparison to EQ, GT, GTU. */
18614 code = reverse_condition (code);
18620 code = reverse_condition (code);
18626 code = swap_condition (code);
18627 x = cop0, cop0 = cop1, cop1 = x;
18631 gcc_unreachable ();
18634 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18635 if (mode == V2DImode)
18640 /* SSE4.1 supports EQ. */
18641 if (!TARGET_SSE4_1)
18647 /* SSE4.2 supports GT/GTU. */
18648 if (!TARGET_SSE4_2)
18653 gcc_unreachable ();
18657 /* Unsigned parallel compare is not supported by the hardware.
18658 Play some tricks to turn this into a signed comparison
18662 cop0 = force_reg (mode, cop0);
18670 rtx (*gen_sub3) (rtx, rtx, rtx);
18672 /* Subtract (-(INT MAX) - 1) from both operands to make
18674 mask = ix86_build_signbit_mask (mode, true, false);
18675 gen_sub3 = (mode == V4SImode
18676 ? gen_subv4si3 : gen_subv2di3);
18677 t1 = gen_reg_rtx (mode);
18678 emit_insn (gen_sub3 (t1, cop0, mask));
18680 t2 = gen_reg_rtx (mode);
18681 emit_insn (gen_sub3 (t2, cop1, mask));
18691 /* Perform a parallel unsigned saturating subtraction. */
18692 x = gen_reg_rtx (mode);
18693 emit_insn (gen_rtx_SET (VOIDmode, x,
18694 gen_rtx_US_MINUS (mode, cop0, cop1)));
18697 cop1 = CONST0_RTX (mode);
18703 gcc_unreachable ();
18708 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18709 operands[1+negate], operands[2-negate]);
18711 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18712 operands[2-negate]);
18716 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18717 true if we should do zero extension, else sign extension. HIGH_P is
18718 true if we want the N/2 high elements, else the low elements. */
18721 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18723 enum machine_mode imode = GET_MODE (operands[1]);
18728 rtx (*unpack)(rtx, rtx);
18734 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18736 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18740 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18742 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18746 unpack = gen_sse4_1_zero_extendv2siv2di2;
18748 unpack = gen_sse4_1_sign_extendv2siv2di2;
18751 gcc_unreachable ();
18756 /* Shift higher 8 bytes to lower 8 bytes. */
18757 tmp = gen_reg_rtx (imode);
18758 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
18759 gen_lowpart (V1TImode, operands[1]),
18765 emit_insn (unpack (operands[0], tmp));
18769 rtx (*unpack)(rtx, rtx, rtx);
18775 unpack = gen_vec_interleave_highv16qi;
18777 unpack = gen_vec_interleave_lowv16qi;
18781 unpack = gen_vec_interleave_highv8hi;
18783 unpack = gen_vec_interleave_lowv8hi;
18787 unpack = gen_vec_interleave_highv4si;
18789 unpack = gen_vec_interleave_lowv4si;
18792 gcc_unreachable ();
18795 dest = gen_lowpart (imode, operands[0]);
18798 tmp = force_reg (imode, CONST0_RTX (imode));
18800 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18801 operands[1], pc_rtx, pc_rtx);
18803 emit_insn (unpack (dest, operands[1], tmp));
18807 /* Expand conditional increment or decrement using adb/sbb instructions.
18808 The default case using setcc followed by the conditional move can be
18809 done by generic code. */
18811 ix86_expand_int_addcc (rtx operands[])
18813 enum rtx_code code = GET_CODE (operands[1]);
18815 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18817 rtx val = const0_rtx;
18818 bool fpcmp = false;
18819 enum machine_mode mode;
18820 rtx op0 = XEXP (operands[1], 0);
18821 rtx op1 = XEXP (operands[1], 1);
18823 if (operands[3] != const1_rtx
18824 && operands[3] != constm1_rtx)
18826 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18828 code = GET_CODE (compare_op);
18830 flags = XEXP (compare_op, 0);
18832 if (GET_MODE (flags) == CCFPmode
18833 || GET_MODE (flags) == CCFPUmode)
18836 code = ix86_fp_compare_code_to_integer (code);
18843 PUT_CODE (compare_op,
18844 reverse_condition_maybe_unordered
18845 (GET_CODE (compare_op)));
18847 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18850 mode = GET_MODE (operands[0]);
18852 /* Construct either adc or sbb insn. */
18853 if ((code == LTU) == (operands[3] == constm1_rtx))
18858 insn = gen_subqi3_carry;
18861 insn = gen_subhi3_carry;
18864 insn = gen_subsi3_carry;
18867 insn = gen_subdi3_carry;
18870 gcc_unreachable ();
18878 insn = gen_addqi3_carry;
18881 insn = gen_addhi3_carry;
18884 insn = gen_addsi3_carry;
18887 insn = gen_adddi3_carry;
18890 gcc_unreachable ();
18893 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18899 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18900 but works for floating pointer parameters and nonoffsetable memories.
18901 For pushes, it returns just stack offsets; the values will be saved
18902 in the right order. Maximally three parts are generated. */
18905 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18910 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18912 size = (GET_MODE_SIZE (mode) + 4) / 8;
18914 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18915 gcc_assert (size >= 2 && size <= 4);
18917 /* Optimize constant pool reference to immediates. This is used by fp
18918 moves, that force all constants to memory to allow combining. */
18919 if (MEM_P (operand) && MEM_READONLY_P (operand))
18921 rtx tmp = maybe_get_pool_constant (operand);
18926 if (MEM_P (operand) && !offsettable_memref_p (operand))
18928 /* The only non-offsetable memories we handle are pushes. */
18929 int ok = push_operand (operand, VOIDmode);
18933 operand = copy_rtx (operand);
18934 PUT_MODE (operand, Pmode);
18935 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18939 if (GET_CODE (operand) == CONST_VECTOR)
18941 enum machine_mode imode = int_mode_for_mode (mode);
18942 /* Caution: if we looked through a constant pool memory above,
18943 the operand may actually have a different mode now. That's
18944 ok, since we want to pun this all the way back to an integer. */
18945 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18946 gcc_assert (operand != NULL);
18952 if (mode == DImode)
18953 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18958 if (REG_P (operand))
18960 gcc_assert (reload_completed);
18961 for (i = 0; i < size; i++)
18962 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18964 else if (offsettable_memref_p (operand))
18966 operand = adjust_address (operand, SImode, 0);
18967 parts[0] = operand;
18968 for (i = 1; i < size; i++)
18969 parts[i] = adjust_address (operand, SImode, 4 * i);
18971 else if (GET_CODE (operand) == CONST_DOUBLE)
18976 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18980 real_to_target (l, &r, mode);
18981 parts[3] = gen_int_mode (l[3], SImode);
18982 parts[2] = gen_int_mode (l[2], SImode);
18985 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18986 parts[2] = gen_int_mode (l[2], SImode);
18989 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18992 gcc_unreachable ();
18994 parts[1] = gen_int_mode (l[1], SImode);
18995 parts[0] = gen_int_mode (l[0], SImode);
18998 gcc_unreachable ();
19003 if (mode == TImode)
19004 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19005 if (mode == XFmode || mode == TFmode)
19007 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19008 if (REG_P (operand))
19010 gcc_assert (reload_completed);
19011 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19012 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19014 else if (offsettable_memref_p (operand))
19016 operand = adjust_address (operand, DImode, 0);
19017 parts[0] = operand;
19018 parts[1] = adjust_address (operand, upper_mode, 8);
19020 else if (GET_CODE (operand) == CONST_DOUBLE)
19025 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19026 real_to_target (l, &r, mode);
19028 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19029 if (HOST_BITS_PER_WIDE_INT >= 64)
19032 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19033 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19036 parts[0] = immed_double_const (l[0], l[1], DImode);
19038 if (upper_mode == SImode)
19039 parts[1] = gen_int_mode (l[2], SImode);
19040 else if (HOST_BITS_PER_WIDE_INT >= 64)
19043 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19044 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19047 parts[1] = immed_double_const (l[2], l[3], DImode);
19050 gcc_unreachable ();
19057 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19058 Return false when normal moves are needed; true when all required
19059 insns have been emitted. Operands 2-4 contain the input values
19060 int the correct order; operands 5-7 contain the output values. */
19063 ix86_split_long_move (rtx operands[])
19068 int collisions = 0;
19069 enum machine_mode mode = GET_MODE (operands[0]);
19070 bool collisionparts[4];
19072 /* The DFmode expanders may ask us to move double.
19073 For 64bit target this is single move. By hiding the fact
19074 here we simplify i386.md splitters. */
19075 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19077 /* Optimize constant pool reference to immediates. This is used by
19078 fp moves, that force all constants to memory to allow combining. */
19080 if (MEM_P (operands[1])
19081 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19082 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19083 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19084 if (push_operand (operands[0], VOIDmode))
19086 operands[0] = copy_rtx (operands[0]);
19087 PUT_MODE (operands[0], Pmode);
19090 operands[0] = gen_lowpart (DImode, operands[0]);
19091 operands[1] = gen_lowpart (DImode, operands[1]);
19092 emit_move_insn (operands[0], operands[1]);
19096 /* The only non-offsettable memory we handle is push. */
19097 if (push_operand (operands[0], VOIDmode))
19100 gcc_assert (!MEM_P (operands[0])
19101 || offsettable_memref_p (operands[0]));
19103 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19104 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19106 /* When emitting push, take care for source operands on the stack. */
19107 if (push && MEM_P (operands[1])
19108 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19110 rtx src_base = XEXP (part[1][nparts - 1], 0);
19112 /* Compensate for the stack decrement by 4. */
19113 if (!TARGET_64BIT && nparts == 3
19114 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19115 src_base = plus_constant (src_base, 4);
19117 /* src_base refers to the stack pointer and is
19118 automatically decreased by emitted push. */
19119 for (i = 0; i < nparts; i++)
19120 part[1][i] = change_address (part[1][i],
19121 GET_MODE (part[1][i]), src_base);
19124 /* We need to do copy in the right order in case an address register
19125 of the source overlaps the destination. */
19126 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19130 for (i = 0; i < nparts; i++)
19133 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19134 if (collisionparts[i])
19138 /* Collision in the middle part can be handled by reordering. */
19139 if (collisions == 1 && nparts == 3 && collisionparts [1])
19141 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19142 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19144 else if (collisions == 1
19146 && (collisionparts [1] || collisionparts [2]))
19148 if (collisionparts [1])
19150 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19151 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19155 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19156 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19160 /* If there are more collisions, we can't handle it by reordering.
19161 Do an lea to the last part and use only one colliding move. */
19162 else if (collisions > 1)
19168 base = part[0][nparts - 1];
19170 /* Handle the case when the last part isn't valid for lea.
19171 Happens in 64-bit mode storing the 12-byte XFmode. */
19172 if (GET_MODE (base) != Pmode)
19173 base = gen_rtx_REG (Pmode, REGNO (base));
19175 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19176 part[1][0] = replace_equiv_address (part[1][0], base);
19177 for (i = 1; i < nparts; i++)
19179 tmp = plus_constant (base, UNITS_PER_WORD * i);
19180 part[1][i] = replace_equiv_address (part[1][i], tmp);
19191 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19192 emit_insn (gen_addsi3 (stack_pointer_rtx,
19193 stack_pointer_rtx, GEN_INT (-4)));
19194 emit_move_insn (part[0][2], part[1][2]);
19196 else if (nparts == 4)
19198 emit_move_insn (part[0][3], part[1][3]);
19199 emit_move_insn (part[0][2], part[1][2]);
19204 /* In 64bit mode we don't have 32bit push available. In case this is
19205 register, it is OK - we will just use larger counterpart. We also
19206 retype memory - these comes from attempt to avoid REX prefix on
19207 moving of second half of TFmode value. */
19208 if (GET_MODE (part[1][1]) == SImode)
19210 switch (GET_CODE (part[1][1]))
19213 part[1][1] = adjust_address (part[1][1], DImode, 0);
19217 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19221 gcc_unreachable ();
19224 if (GET_MODE (part[1][0]) == SImode)
19225 part[1][0] = part[1][1];
19228 emit_move_insn (part[0][1], part[1][1]);
19229 emit_move_insn (part[0][0], part[1][0]);
19233 /* Choose correct order to not overwrite the source before it is copied. */
19234 if ((REG_P (part[0][0])
19235 && REG_P (part[1][1])
19236 && (REGNO (part[0][0]) == REGNO (part[1][1])
19238 && REGNO (part[0][0]) == REGNO (part[1][2]))
19240 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19242 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19244 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19246 operands[2 + i] = part[0][j];
19247 operands[6 + i] = part[1][j];
19252 for (i = 0; i < nparts; i++)
19254 operands[2 + i] = part[0][i];
19255 operands[6 + i] = part[1][i];
19259 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19260 if (optimize_insn_for_size_p ())
19262 for (j = 0; j < nparts - 1; j++)
19263 if (CONST_INT_P (operands[6 + j])
19264 && operands[6 + j] != const0_rtx
19265 && REG_P (operands[2 + j]))
19266 for (i = j; i < nparts - 1; i++)
19267 if (CONST_INT_P (operands[7 + i])
19268 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19269 operands[7 + i] = operands[2 + j];
19272 for (i = 0; i < nparts; i++)
19273 emit_move_insn (operands[2 + i], operands[6 + i]);
19278 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19279 left shift by a constant, either using a single shift or
19280 a sequence of add instructions. */
19283 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19285 rtx (*insn)(rtx, rtx, rtx);
19288 || (count * ix86_cost->add <= ix86_cost->shift_const
19289 && !optimize_insn_for_size_p ()))
19291 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19292 while (count-- > 0)
19293 emit_insn (insn (operand, operand, operand));
19297 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19298 emit_insn (insn (operand, operand, GEN_INT (count)));
19303 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19305 rtx (*gen_ashl3)(rtx, rtx, rtx);
19306 rtx (*gen_shld)(rtx, rtx, rtx);
19307 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19309 rtx low[2], high[2];
19312 if (CONST_INT_P (operands[2]))
19314 split_double_mode (mode, operands, 2, low, high);
19315 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19317 if (count >= half_width)
19319 emit_move_insn (high[0], low[1]);
19320 emit_move_insn (low[0], const0_rtx);
19322 if (count > half_width)
19323 ix86_expand_ashl_const (high[0], count - half_width, mode);
19327 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19329 if (!rtx_equal_p (operands[0], operands[1]))
19330 emit_move_insn (operands[0], operands[1]);
19332 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19333 ix86_expand_ashl_const (low[0], count, mode);
19338 split_double_mode (mode, operands, 1, low, high);
19340 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19342 if (operands[1] == const1_rtx)
19344 /* Assuming we've chosen a QImode capable registers, then 1 << N
19345 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19346 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19348 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19350 ix86_expand_clear (low[0]);
19351 ix86_expand_clear (high[0]);
19352 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19354 d = gen_lowpart (QImode, low[0]);
19355 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19356 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19357 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19359 d = gen_lowpart (QImode, high[0]);
19360 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19361 s = gen_rtx_NE (QImode, flags, const0_rtx);
19362 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19365 /* Otherwise, we can get the same results by manually performing
19366 a bit extract operation on bit 5/6, and then performing the two
19367 shifts. The two methods of getting 0/1 into low/high are exactly
19368 the same size. Avoiding the shift in the bit extract case helps
19369 pentium4 a bit; no one else seems to care much either way. */
19372 enum machine_mode half_mode;
19373 rtx (*gen_lshr3)(rtx, rtx, rtx);
19374 rtx (*gen_and3)(rtx, rtx, rtx);
19375 rtx (*gen_xor3)(rtx, rtx, rtx);
19376 HOST_WIDE_INT bits;
19379 if (mode == DImode)
19381 half_mode = SImode;
19382 gen_lshr3 = gen_lshrsi3;
19383 gen_and3 = gen_andsi3;
19384 gen_xor3 = gen_xorsi3;
19389 half_mode = DImode;
19390 gen_lshr3 = gen_lshrdi3;
19391 gen_and3 = gen_anddi3;
19392 gen_xor3 = gen_xordi3;
19396 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19397 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19399 x = gen_lowpart (half_mode, operands[2]);
19400 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19402 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19403 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19404 emit_move_insn (low[0], high[0]);
19405 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19408 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19409 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19413 if (operands[1] == constm1_rtx)
19415 /* For -1 << N, we can avoid the shld instruction, because we
19416 know that we're shifting 0...31/63 ones into a -1. */
19417 emit_move_insn (low[0], constm1_rtx);
19418 if (optimize_insn_for_size_p ())
19419 emit_move_insn (high[0], low[0]);
19421 emit_move_insn (high[0], constm1_rtx);
19425 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19427 if (!rtx_equal_p (operands[0], operands[1]))
19428 emit_move_insn (operands[0], operands[1]);
19430 split_double_mode (mode, operands, 1, low, high);
19431 emit_insn (gen_shld (high[0], low[0], operands[2]));
19434 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19436 if (TARGET_CMOVE && scratch)
19438 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19439 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19441 ix86_expand_clear (scratch);
19442 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19446 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19447 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19449 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19454 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19456 rtx (*gen_ashr3)(rtx, rtx, rtx)
19457 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19458 rtx (*gen_shrd)(rtx, rtx, rtx);
19459 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19461 rtx low[2], high[2];
19464 if (CONST_INT_P (operands[2]))
19466 split_double_mode (mode, operands, 2, low, high);
19467 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19469 if (count == GET_MODE_BITSIZE (mode) - 1)
19471 emit_move_insn (high[0], high[1]);
19472 emit_insn (gen_ashr3 (high[0], high[0],
19473 GEN_INT (half_width - 1)));
19474 emit_move_insn (low[0], high[0]);
19477 else if (count >= half_width)
19479 emit_move_insn (low[0], high[1]);
19480 emit_move_insn (high[0], low[0]);
19481 emit_insn (gen_ashr3 (high[0], high[0],
19482 GEN_INT (half_width - 1)));
19484 if (count > half_width)
19485 emit_insn (gen_ashr3 (low[0], low[0],
19486 GEN_INT (count - half_width)));
19490 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19492 if (!rtx_equal_p (operands[0], operands[1]))
19493 emit_move_insn (operands[0], operands[1]);
19495 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19496 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19501 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19503 if (!rtx_equal_p (operands[0], operands[1]))
19504 emit_move_insn (operands[0], operands[1]);
19506 split_double_mode (mode, operands, 1, low, high);
19508 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19509 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19511 if (TARGET_CMOVE && scratch)
19513 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19514 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19516 emit_move_insn (scratch, high[0]);
19517 emit_insn (gen_ashr3 (scratch, scratch,
19518 GEN_INT (half_width - 1)));
19519 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19524 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19525 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19527 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19533 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19535 rtx (*gen_lshr3)(rtx, rtx, rtx)
19536 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19537 rtx (*gen_shrd)(rtx, rtx, rtx);
19538 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19540 rtx low[2], high[2];
19543 if (CONST_INT_P (operands[2]))
19545 split_double_mode (mode, operands, 2, low, high);
19546 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19548 if (count >= half_width)
19550 emit_move_insn (low[0], high[1]);
19551 ix86_expand_clear (high[0]);
19553 if (count > half_width)
19554 emit_insn (gen_lshr3 (low[0], low[0],
19555 GEN_INT (count - half_width)));
19559 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19561 if (!rtx_equal_p (operands[0], operands[1]))
19562 emit_move_insn (operands[0], operands[1]);
19564 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19565 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19570 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19572 if (!rtx_equal_p (operands[0], operands[1]))
19573 emit_move_insn (operands[0], operands[1]);
19575 split_double_mode (mode, operands, 1, low, high);
19577 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19578 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19580 if (TARGET_CMOVE && scratch)
19582 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19583 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19585 ix86_expand_clear (scratch);
19586 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19591 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19592 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19594 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19599 /* Predict just emitted jump instruction to be taken with probability PROB. */
19601 predict_jump (int prob)
19603 rtx insn = get_last_insn ();
19604 gcc_assert (JUMP_P (insn));
19605 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19608 /* Helper function for the string operations below. Dest VARIABLE whether
19609 it is aligned to VALUE bytes. If true, jump to the label. */
19611 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19613 rtx label = gen_label_rtx ();
19614 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19615 if (GET_MODE (variable) == DImode)
19616 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19618 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19619 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19622 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19624 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19628 /* Adjust COUNTER by the VALUE. */
19630 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19632 rtx (*gen_add)(rtx, rtx, rtx)
19633 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19635 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19638 /* Zero extend possibly SImode EXP to Pmode register. */
19640 ix86_zero_extend_to_Pmode (rtx exp)
19643 if (GET_MODE (exp) == VOIDmode)
19644 return force_reg (Pmode, exp);
19645 if (GET_MODE (exp) == Pmode)
19646 return copy_to_mode_reg (Pmode, exp);
19647 r = gen_reg_rtx (Pmode);
19648 emit_insn (gen_zero_extendsidi2 (r, exp));
19652 /* Divide COUNTREG by SCALE. */
19654 scale_counter (rtx countreg, int scale)
19660 if (CONST_INT_P (countreg))
19661 return GEN_INT (INTVAL (countreg) / scale);
19662 gcc_assert (REG_P (countreg));
19664 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19665 GEN_INT (exact_log2 (scale)),
19666 NULL, 1, OPTAB_DIRECT);
19670 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19671 DImode for constant loop counts. */
19673 static enum machine_mode
19674 counter_mode (rtx count_exp)
19676 if (GET_MODE (count_exp) != VOIDmode)
19677 return GET_MODE (count_exp);
19678 if (!CONST_INT_P (count_exp))
19680 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19685 /* When SRCPTR is non-NULL, output simple loop to move memory
19686 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19687 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19688 equivalent loop to set memory by VALUE (supposed to be in MODE).
19690 The size is rounded down to whole number of chunk size moved at once.
19691 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19695 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19696 rtx destptr, rtx srcptr, rtx value,
19697 rtx count, enum machine_mode mode, int unroll,
19700 rtx out_label, top_label, iter, tmp;
19701 enum machine_mode iter_mode = counter_mode (count);
19702 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19703 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19709 top_label = gen_label_rtx ();
19710 out_label = gen_label_rtx ();
19711 iter = gen_reg_rtx (iter_mode);
19713 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19714 NULL, 1, OPTAB_DIRECT);
19715 /* Those two should combine. */
19716 if (piece_size == const1_rtx)
19718 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19720 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19722 emit_move_insn (iter, const0_rtx);
19724 emit_label (top_label);
19726 tmp = convert_modes (Pmode, iter_mode, iter, true);
19727 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19728 destmem = change_address (destmem, mode, x_addr);
19732 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19733 srcmem = change_address (srcmem, mode, y_addr);
19735 /* When unrolling for chips that reorder memory reads and writes,
19736 we can save registers by using single temporary.
19737 Also using 4 temporaries is overkill in 32bit mode. */
19738 if (!TARGET_64BIT && 0)
19740 for (i = 0; i < unroll; i++)
19745 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19747 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19749 emit_move_insn (destmem, srcmem);
19755 gcc_assert (unroll <= 4);
19756 for (i = 0; i < unroll; i++)
19758 tmpreg[i] = gen_reg_rtx (mode);
19762 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19764 emit_move_insn (tmpreg[i], srcmem);
19766 for (i = 0; i < unroll; i++)
19771 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19773 emit_move_insn (destmem, tmpreg[i]);
19778 for (i = 0; i < unroll; i++)
19782 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19783 emit_move_insn (destmem, value);
19786 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19787 true, OPTAB_LIB_WIDEN);
19789 emit_move_insn (iter, tmp);
19791 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19793 if (expected_size != -1)
19795 expected_size /= GET_MODE_SIZE (mode) * unroll;
19796 if (expected_size == 0)
19798 else if (expected_size > REG_BR_PROB_BASE)
19799 predict_jump (REG_BR_PROB_BASE - 1);
19801 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19804 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19805 iter = ix86_zero_extend_to_Pmode (iter);
19806 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19807 true, OPTAB_LIB_WIDEN);
19808 if (tmp != destptr)
19809 emit_move_insn (destptr, tmp);
19812 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19813 true, OPTAB_LIB_WIDEN);
19815 emit_move_insn (srcptr, tmp);
19817 emit_label (out_label);
19820 /* Output "rep; mov" instruction.
19821 Arguments have same meaning as for previous function */
19823 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19824 rtx destptr, rtx srcptr,
19826 enum machine_mode mode)
19831 HOST_WIDE_INT rounded_count;
19833 /* If the size is known, it is shorter to use rep movs. */
19834 if (mode == QImode && CONST_INT_P (count)
19835 && !(INTVAL (count) & 3))
19838 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19839 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19840 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19841 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19842 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19843 if (mode != QImode)
19845 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19846 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19847 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19848 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19849 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19850 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19854 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19855 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19857 if (CONST_INT_P (count))
19859 rounded_count = (INTVAL (count)
19860 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19861 destmem = shallow_copy_rtx (destmem);
19862 srcmem = shallow_copy_rtx (srcmem);
19863 set_mem_size (destmem, rounded_count);
19864 set_mem_size (srcmem, rounded_count);
19868 if (MEM_SIZE_KNOWN_P (destmem))
19869 clear_mem_size (destmem);
19870 if (MEM_SIZE_KNOWN_P (srcmem))
19871 clear_mem_size (srcmem);
19873 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19877 /* Output "rep; stos" instruction.
19878 Arguments have same meaning as for previous function */
19880 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19881 rtx count, enum machine_mode mode,
19886 HOST_WIDE_INT rounded_count;
19888 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19889 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19890 value = force_reg (mode, gen_lowpart (mode, value));
19891 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19892 if (mode != QImode)
19894 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19895 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19896 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19899 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19900 if (orig_value == const0_rtx && CONST_INT_P (count))
19902 rounded_count = (INTVAL (count)
19903 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19904 destmem = shallow_copy_rtx (destmem);
19905 set_mem_size (destmem, rounded_count);
19907 else if (MEM_SIZE_KNOWN_P (destmem))
19908 clear_mem_size (destmem);
19909 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19913 emit_strmov (rtx destmem, rtx srcmem,
19914 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19916 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19917 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19918 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19921 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19923 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19924 rtx destptr, rtx srcptr, rtx count, int max_size)
19927 if (CONST_INT_P (count))
19929 HOST_WIDE_INT countval = INTVAL (count);
19932 if ((countval & 0x10) && max_size > 16)
19936 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19937 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19940 gcc_unreachable ();
19943 if ((countval & 0x08) && max_size > 8)
19946 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19949 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19950 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19954 if ((countval & 0x04) && max_size > 4)
19956 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19959 if ((countval & 0x02) && max_size > 2)
19961 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19964 if ((countval & 0x01) && max_size > 1)
19966 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19973 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19974 count, 1, OPTAB_DIRECT);
19975 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19976 count, QImode, 1, 4);
19980 /* When there are stringops, we can cheaply increase dest and src pointers.
19981 Otherwise we save code size by maintaining offset (zero is readily
19982 available from preceding rep operation) and using x86 addressing modes.
19984 if (TARGET_SINGLE_STRINGOP)
19988 rtx label = ix86_expand_aligntest (count, 4, true);
19989 src = change_address (srcmem, SImode, srcptr);
19990 dest = change_address (destmem, SImode, destptr);
19991 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19992 emit_label (label);
19993 LABEL_NUSES (label) = 1;
19997 rtx label = ix86_expand_aligntest (count, 2, true);
19998 src = change_address (srcmem, HImode, srcptr);
19999 dest = change_address (destmem, HImode, destptr);
20000 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20001 emit_label (label);
20002 LABEL_NUSES (label) = 1;
20006 rtx label = ix86_expand_aligntest (count, 1, true);
20007 src = change_address (srcmem, QImode, srcptr);
20008 dest = change_address (destmem, QImode, destptr);
20009 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20010 emit_label (label);
20011 LABEL_NUSES (label) = 1;
20016 rtx offset = force_reg (Pmode, const0_rtx);
20021 rtx label = ix86_expand_aligntest (count, 4, true);
20022 src = change_address (srcmem, SImode, srcptr);
20023 dest = change_address (destmem, SImode, destptr);
20024 emit_move_insn (dest, src);
20025 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20026 true, OPTAB_LIB_WIDEN);
20028 emit_move_insn (offset, tmp);
20029 emit_label (label);
20030 LABEL_NUSES (label) = 1;
20034 rtx label = ix86_expand_aligntest (count, 2, true);
20035 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20036 src = change_address (srcmem, HImode, tmp);
20037 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20038 dest = change_address (destmem, HImode, tmp);
20039 emit_move_insn (dest, src);
20040 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20041 true, OPTAB_LIB_WIDEN);
20043 emit_move_insn (offset, tmp);
20044 emit_label (label);
20045 LABEL_NUSES (label) = 1;
20049 rtx label = ix86_expand_aligntest (count, 1, true);
20050 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20051 src = change_address (srcmem, QImode, tmp);
20052 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20053 dest = change_address (destmem, QImode, tmp);
20054 emit_move_insn (dest, src);
20055 emit_label (label);
20056 LABEL_NUSES (label) = 1;
20061 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20063 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20064 rtx count, int max_size)
20067 expand_simple_binop (counter_mode (count), AND, count,
20068 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20069 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20070 gen_lowpart (QImode, value), count, QImode,
20074 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20076 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20080 if (CONST_INT_P (count))
20082 HOST_WIDE_INT countval = INTVAL (count);
20085 if ((countval & 0x10) && max_size > 16)
20089 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20090 emit_insn (gen_strset (destptr, dest, value));
20091 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20092 emit_insn (gen_strset (destptr, dest, value));
20095 gcc_unreachable ();
20098 if ((countval & 0x08) && max_size > 8)
20102 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20103 emit_insn (gen_strset (destptr, dest, value));
20107 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20108 emit_insn (gen_strset (destptr, dest, value));
20109 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20110 emit_insn (gen_strset (destptr, dest, value));
20114 if ((countval & 0x04) && max_size > 4)
20116 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20117 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20120 if ((countval & 0x02) && max_size > 2)
20122 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20123 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20126 if ((countval & 0x01) && max_size > 1)
20128 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20129 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20136 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20141 rtx label = ix86_expand_aligntest (count, 16, true);
20144 dest = change_address (destmem, DImode, destptr);
20145 emit_insn (gen_strset (destptr, dest, value));
20146 emit_insn (gen_strset (destptr, dest, value));
20150 dest = change_address (destmem, SImode, destptr);
20151 emit_insn (gen_strset (destptr, dest, value));
20152 emit_insn (gen_strset (destptr, dest, value));
20153 emit_insn (gen_strset (destptr, dest, value));
20154 emit_insn (gen_strset (destptr, dest, value));
20156 emit_label (label);
20157 LABEL_NUSES (label) = 1;
20161 rtx label = ix86_expand_aligntest (count, 8, true);
20164 dest = change_address (destmem, DImode, destptr);
20165 emit_insn (gen_strset (destptr, dest, value));
20169 dest = change_address (destmem, SImode, destptr);
20170 emit_insn (gen_strset (destptr, dest, value));
20171 emit_insn (gen_strset (destptr, dest, value));
20173 emit_label (label);
20174 LABEL_NUSES (label) = 1;
20178 rtx label = ix86_expand_aligntest (count, 4, true);
20179 dest = change_address (destmem, SImode, destptr);
20180 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20181 emit_label (label);
20182 LABEL_NUSES (label) = 1;
20186 rtx label = ix86_expand_aligntest (count, 2, true);
20187 dest = change_address (destmem, HImode, destptr);
20188 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20189 emit_label (label);
20190 LABEL_NUSES (label) = 1;
20194 rtx label = ix86_expand_aligntest (count, 1, true);
20195 dest = change_address (destmem, QImode, destptr);
20196 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20197 emit_label (label);
20198 LABEL_NUSES (label) = 1;
20202 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20203 DESIRED_ALIGNMENT. */
20205 expand_movmem_prologue (rtx destmem, rtx srcmem,
20206 rtx destptr, rtx srcptr, rtx count,
20207 int align, int desired_alignment)
20209 if (align <= 1 && desired_alignment > 1)
20211 rtx label = ix86_expand_aligntest (destptr, 1, false);
20212 srcmem = change_address (srcmem, QImode, srcptr);
20213 destmem = change_address (destmem, QImode, destptr);
20214 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20215 ix86_adjust_counter (count, 1);
20216 emit_label (label);
20217 LABEL_NUSES (label) = 1;
20219 if (align <= 2 && desired_alignment > 2)
20221 rtx label = ix86_expand_aligntest (destptr, 2, false);
20222 srcmem = change_address (srcmem, HImode, srcptr);
20223 destmem = change_address (destmem, HImode, destptr);
20224 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20225 ix86_adjust_counter (count, 2);
20226 emit_label (label);
20227 LABEL_NUSES (label) = 1;
20229 if (align <= 4 && desired_alignment > 4)
20231 rtx label = ix86_expand_aligntest (destptr, 4, false);
20232 srcmem = change_address (srcmem, SImode, srcptr);
20233 destmem = change_address (destmem, SImode, destptr);
20234 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20235 ix86_adjust_counter (count, 4);
20236 emit_label (label);
20237 LABEL_NUSES (label) = 1;
20239 gcc_assert (desired_alignment <= 8);
20242 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20243 ALIGN_BYTES is how many bytes need to be copied. */
20245 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20246 int desired_align, int align_bytes)
20249 rtx orig_dst = dst;
20250 rtx orig_src = src;
20252 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20253 if (src_align_bytes >= 0)
20254 src_align_bytes = desired_align - src_align_bytes;
20255 if (align_bytes & 1)
20257 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20258 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20260 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20262 if (align_bytes & 2)
20264 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20265 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20266 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20267 set_mem_align (dst, 2 * BITS_PER_UNIT);
20268 if (src_align_bytes >= 0
20269 && (src_align_bytes & 1) == (align_bytes & 1)
20270 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20271 set_mem_align (src, 2 * BITS_PER_UNIT);
20273 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20275 if (align_bytes & 4)
20277 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20278 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20279 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20280 set_mem_align (dst, 4 * BITS_PER_UNIT);
20281 if (src_align_bytes >= 0)
20283 unsigned int src_align = 0;
20284 if ((src_align_bytes & 3) == (align_bytes & 3))
20286 else if ((src_align_bytes & 1) == (align_bytes & 1))
20288 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20289 set_mem_align (src, src_align * BITS_PER_UNIT);
20292 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20294 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20295 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20296 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20297 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20298 if (src_align_bytes >= 0)
20300 unsigned int src_align = 0;
20301 if ((src_align_bytes & 7) == (align_bytes & 7))
20303 else if ((src_align_bytes & 3) == (align_bytes & 3))
20305 else if ((src_align_bytes & 1) == (align_bytes & 1))
20307 if (src_align > (unsigned int) desired_align)
20308 src_align = desired_align;
20309 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20310 set_mem_align (src, src_align * BITS_PER_UNIT);
20312 if (MEM_SIZE_KNOWN_P (orig_dst))
20313 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20314 if (MEM_SIZE_KNOWN_P (orig_src))
20315 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
20320 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20321 DESIRED_ALIGNMENT. */
20323 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20324 int align, int desired_alignment)
20326 if (align <= 1 && desired_alignment > 1)
20328 rtx label = ix86_expand_aligntest (destptr, 1, false);
20329 destmem = change_address (destmem, QImode, destptr);
20330 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20331 ix86_adjust_counter (count, 1);
20332 emit_label (label);
20333 LABEL_NUSES (label) = 1;
20335 if (align <= 2 && desired_alignment > 2)
20337 rtx label = ix86_expand_aligntest (destptr, 2, false);
20338 destmem = change_address (destmem, HImode, destptr);
20339 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20340 ix86_adjust_counter (count, 2);
20341 emit_label (label);
20342 LABEL_NUSES (label) = 1;
20344 if (align <= 4 && desired_alignment > 4)
20346 rtx label = ix86_expand_aligntest (destptr, 4, false);
20347 destmem = change_address (destmem, SImode, destptr);
20348 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20349 ix86_adjust_counter (count, 4);
20350 emit_label (label);
20351 LABEL_NUSES (label) = 1;
20353 gcc_assert (desired_alignment <= 8);
20356 /* Set enough from DST to align DST known to by aligned by ALIGN to
20357 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20359 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20360 int desired_align, int align_bytes)
20363 rtx orig_dst = dst;
20364 if (align_bytes & 1)
20366 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20368 emit_insn (gen_strset (destreg, dst,
20369 gen_lowpart (QImode, value)));
20371 if (align_bytes & 2)
20373 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20374 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20375 set_mem_align (dst, 2 * BITS_PER_UNIT);
20377 emit_insn (gen_strset (destreg, dst,
20378 gen_lowpart (HImode, value)));
20380 if (align_bytes & 4)
20382 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20383 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20384 set_mem_align (dst, 4 * BITS_PER_UNIT);
20386 emit_insn (gen_strset (destreg, dst,
20387 gen_lowpart (SImode, value)));
20389 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20390 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20391 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20392 if (MEM_SIZE_KNOWN_P (orig_dst))
20393 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20397 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20398 static enum stringop_alg
20399 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20400 int *dynamic_check)
20402 const struct stringop_algs * algs;
20403 bool optimize_for_speed;
20404 /* Algorithms using the rep prefix want at least edi and ecx;
20405 additionally, memset wants eax and memcpy wants esi. Don't
20406 consider such algorithms if the user has appropriated those
20407 registers for their own purposes. */
20408 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20410 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20412 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20413 || (alg != rep_prefix_1_byte \
20414 && alg != rep_prefix_4_byte \
20415 && alg != rep_prefix_8_byte))
20416 const struct processor_costs *cost;
20418 /* Even if the string operation call is cold, we still might spend a lot
20419 of time processing large blocks. */
20420 if (optimize_function_for_size_p (cfun)
20421 || (optimize_insn_for_size_p ()
20422 && expected_size != -1 && expected_size < 256))
20423 optimize_for_speed = false;
20425 optimize_for_speed = true;
20427 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20429 *dynamic_check = -1;
20431 algs = &cost->memset[TARGET_64BIT != 0];
20433 algs = &cost->memcpy[TARGET_64BIT != 0];
20434 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20435 return ix86_stringop_alg;
20436 /* rep; movq or rep; movl is the smallest variant. */
20437 else if (!optimize_for_speed)
20439 if (!count || (count & 3))
20440 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20442 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20444 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20446 else if (expected_size != -1 && expected_size < 4)
20447 return loop_1_byte;
20448 else if (expected_size != -1)
20451 enum stringop_alg alg = libcall;
20452 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20454 /* We get here if the algorithms that were not libcall-based
20455 were rep-prefix based and we are unable to use rep prefixes
20456 based on global register usage. Break out of the loop and
20457 use the heuristic below. */
20458 if (algs->size[i].max == 0)
20460 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20462 enum stringop_alg candidate = algs->size[i].alg;
20464 if (candidate != libcall && ALG_USABLE_P (candidate))
20466 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20467 last non-libcall inline algorithm. */
20468 if (TARGET_INLINE_ALL_STRINGOPS)
20470 /* When the current size is best to be copied by a libcall,
20471 but we are still forced to inline, run the heuristic below
20472 that will pick code for medium sized blocks. */
20473 if (alg != libcall)
20477 else if (ALG_USABLE_P (candidate))
20481 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20483 /* When asked to inline the call anyway, try to pick meaningful choice.
20484 We look for maximal size of block that is faster to copy by hand and
20485 take blocks of at most of that size guessing that average size will
20486 be roughly half of the block.
20488 If this turns out to be bad, we might simply specify the preferred
20489 choice in ix86_costs. */
20490 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20491 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20494 enum stringop_alg alg;
20496 bool any_alg_usable_p = true;
20498 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20500 enum stringop_alg candidate = algs->size[i].alg;
20501 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20503 if (candidate != libcall && candidate
20504 && ALG_USABLE_P (candidate))
20505 max = algs->size[i].max;
20507 /* If there aren't any usable algorithms, then recursing on
20508 smaller sizes isn't going to find anything. Just return the
20509 simple byte-at-a-time copy loop. */
20510 if (!any_alg_usable_p)
20512 /* Pick something reasonable. */
20513 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20514 *dynamic_check = 128;
20515 return loop_1_byte;
20519 alg = decide_alg (count, max / 2, memset, dynamic_check);
20520 gcc_assert (*dynamic_check == -1);
20521 gcc_assert (alg != libcall);
20522 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20523 *dynamic_check = max;
20526 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20527 #undef ALG_USABLE_P
20530 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20531 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20533 decide_alignment (int align,
20534 enum stringop_alg alg,
20537 int desired_align = 0;
20541 gcc_unreachable ();
20543 case unrolled_loop:
20544 desired_align = GET_MODE_SIZE (Pmode);
20546 case rep_prefix_8_byte:
20549 case rep_prefix_4_byte:
20550 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20551 copying whole cacheline at once. */
20552 if (TARGET_PENTIUMPRO)
20557 case rep_prefix_1_byte:
20558 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20559 copying whole cacheline at once. */
20560 if (TARGET_PENTIUMPRO)
20574 if (desired_align < align)
20575 desired_align = align;
20576 if (expected_size != -1 && expected_size < 4)
20577 desired_align = align;
20578 return desired_align;
20581 /* Return the smallest power of 2 greater than VAL. */
20583 smallest_pow2_greater_than (int val)
20591 /* Expand string move (memcpy) operation. Use i386 string operations
20592 when profitable. expand_setmem contains similar code. The code
20593 depends upon architecture, block size and alignment, but always has
20594 the same overall structure:
20596 1) Prologue guard: Conditional that jumps up to epilogues for small
20597 blocks that can be handled by epilogue alone. This is faster
20598 but also needed for correctness, since prologue assume the block
20599 is larger than the desired alignment.
20601 Optional dynamic check for size and libcall for large
20602 blocks is emitted here too, with -minline-stringops-dynamically.
20604 2) Prologue: copy first few bytes in order to get destination
20605 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20606 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20607 copied. We emit either a jump tree on power of two sized
20608 blocks, or a byte loop.
20610 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20611 with specified algorithm.
20613 4) Epilogue: code copying tail of the block that is too small to be
20614 handled by main body (or up to size guarded by prologue guard). */
20617 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20618 rtx expected_align_exp, rtx expected_size_exp)
20624 rtx jump_around_label = NULL;
20625 HOST_WIDE_INT align = 1;
20626 unsigned HOST_WIDE_INT count = 0;
20627 HOST_WIDE_INT expected_size = -1;
20628 int size_needed = 0, epilogue_size_needed;
20629 int desired_align = 0, align_bytes = 0;
20630 enum stringop_alg alg;
20632 bool need_zero_guard = false;
20634 if (CONST_INT_P (align_exp))
20635 align = INTVAL (align_exp);
20636 /* i386 can do misaligned access on reasonably increased cost. */
20637 if (CONST_INT_P (expected_align_exp)
20638 && INTVAL (expected_align_exp) > align)
20639 align = INTVAL (expected_align_exp);
20640 /* ALIGN is the minimum of destination and source alignment, but we care here
20641 just about destination alignment. */
20642 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20643 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20645 if (CONST_INT_P (count_exp))
20646 count = expected_size = INTVAL (count_exp);
20647 if (CONST_INT_P (expected_size_exp) && count == 0)
20648 expected_size = INTVAL (expected_size_exp);
20650 /* Make sure we don't need to care about overflow later on. */
20651 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20654 /* Step 0: Decide on preferred algorithm, desired alignment and
20655 size of chunks to be copied by main loop. */
20657 alg = decide_alg (count, expected_size, false, &dynamic_check);
20658 desired_align = decide_alignment (align, alg, expected_size);
20660 if (!TARGET_ALIGN_STRINGOPS)
20661 align = desired_align;
20663 if (alg == libcall)
20665 gcc_assert (alg != no_stringop);
20667 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20668 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20669 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20674 gcc_unreachable ();
20676 need_zero_guard = true;
20677 size_needed = GET_MODE_SIZE (Pmode);
20679 case unrolled_loop:
20680 need_zero_guard = true;
20681 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20683 case rep_prefix_8_byte:
20686 case rep_prefix_4_byte:
20689 case rep_prefix_1_byte:
20693 need_zero_guard = true;
20698 epilogue_size_needed = size_needed;
20700 /* Step 1: Prologue guard. */
20702 /* Alignment code needs count to be in register. */
20703 if (CONST_INT_P (count_exp) && desired_align > align)
20705 if (INTVAL (count_exp) > desired_align
20706 && INTVAL (count_exp) > size_needed)
20709 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20710 if (align_bytes <= 0)
20713 align_bytes = desired_align - align_bytes;
20715 if (align_bytes == 0)
20716 count_exp = force_reg (counter_mode (count_exp), count_exp);
20718 gcc_assert (desired_align >= 1 && align >= 1);
20720 /* Ensure that alignment prologue won't copy past end of block. */
20721 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20723 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20724 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20725 Make sure it is power of 2. */
20726 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20730 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20732 /* If main algorithm works on QImode, no epilogue is needed.
20733 For small sizes just don't align anything. */
20734 if (size_needed == 1)
20735 desired_align = align;
20742 label = gen_label_rtx ();
20743 emit_cmp_and_jump_insns (count_exp,
20744 GEN_INT (epilogue_size_needed),
20745 LTU, 0, counter_mode (count_exp), 1, label);
20746 if (expected_size == -1 || expected_size < epilogue_size_needed)
20747 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20749 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20753 /* Emit code to decide on runtime whether library call or inline should be
20755 if (dynamic_check != -1)
20757 if (CONST_INT_P (count_exp))
20759 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20761 emit_block_move_via_libcall (dst, src, count_exp, false);
20762 count_exp = const0_rtx;
20768 rtx hot_label = gen_label_rtx ();
20769 jump_around_label = gen_label_rtx ();
20770 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20771 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20772 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20773 emit_block_move_via_libcall (dst, src, count_exp, false);
20774 emit_jump (jump_around_label);
20775 emit_label (hot_label);
20779 /* Step 2: Alignment prologue. */
20781 if (desired_align > align)
20783 if (align_bytes == 0)
20785 /* Except for the first move in epilogue, we no longer know
20786 constant offset in aliasing info. It don't seems to worth
20787 the pain to maintain it for the first move, so throw away
20789 src = change_address (src, BLKmode, srcreg);
20790 dst = change_address (dst, BLKmode, destreg);
20791 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20796 /* If we know how many bytes need to be stored before dst is
20797 sufficiently aligned, maintain aliasing info accurately. */
20798 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20799 desired_align, align_bytes);
20800 count_exp = plus_constant (count_exp, -align_bytes);
20801 count -= align_bytes;
20803 if (need_zero_guard
20804 && (count < (unsigned HOST_WIDE_INT) size_needed
20805 || (align_bytes == 0
20806 && count < ((unsigned HOST_WIDE_INT) size_needed
20807 + desired_align - align))))
20809 /* It is possible that we copied enough so the main loop will not
20811 gcc_assert (size_needed > 1);
20812 if (label == NULL_RTX)
20813 label = gen_label_rtx ();
20814 emit_cmp_and_jump_insns (count_exp,
20815 GEN_INT (size_needed),
20816 LTU, 0, counter_mode (count_exp), 1, label);
20817 if (expected_size == -1
20818 || expected_size < (desired_align - align) / 2 + size_needed)
20819 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20821 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20824 if (label && size_needed == 1)
20826 emit_label (label);
20827 LABEL_NUSES (label) = 1;
20829 epilogue_size_needed = 1;
20831 else if (label == NULL_RTX)
20832 epilogue_size_needed = size_needed;
20834 /* Step 3: Main loop. */
20840 gcc_unreachable ();
20842 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20843 count_exp, QImode, 1, expected_size);
20846 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20847 count_exp, Pmode, 1, expected_size);
20849 case unrolled_loop:
20850 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20851 registers for 4 temporaries anyway. */
20852 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20853 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20856 case rep_prefix_8_byte:
20857 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20860 case rep_prefix_4_byte:
20861 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20864 case rep_prefix_1_byte:
20865 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20869 /* Adjust properly the offset of src and dest memory for aliasing. */
20870 if (CONST_INT_P (count_exp))
20872 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20873 (count / size_needed) * size_needed);
20874 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20875 (count / size_needed) * size_needed);
20879 src = change_address (src, BLKmode, srcreg);
20880 dst = change_address (dst, BLKmode, destreg);
20883 /* Step 4: Epilogue to copy the remaining bytes. */
20887 /* When the main loop is done, COUNT_EXP might hold original count,
20888 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20889 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20890 bytes. Compensate if needed. */
20892 if (size_needed < epilogue_size_needed)
20895 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20896 GEN_INT (size_needed - 1), count_exp, 1,
20898 if (tmp != count_exp)
20899 emit_move_insn (count_exp, tmp);
20901 emit_label (label);
20902 LABEL_NUSES (label) = 1;
20905 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20906 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20907 epilogue_size_needed);
20908 if (jump_around_label)
20909 emit_label (jump_around_label);
20913 /* Helper function for memcpy. For QImode value 0xXY produce
20914 0xXYXYXYXY of wide specified by MODE. This is essentially
20915 a * 0x10101010, but we can do slightly better than
20916 synth_mult by unwinding the sequence by hand on CPUs with
20919 promote_duplicated_reg (enum machine_mode mode, rtx val)
20921 enum machine_mode valmode = GET_MODE (val);
20923 int nops = mode == DImode ? 3 : 2;
20925 gcc_assert (mode == SImode || mode == DImode);
20926 if (val == const0_rtx)
20927 return copy_to_mode_reg (mode, const0_rtx);
20928 if (CONST_INT_P (val))
20930 HOST_WIDE_INT v = INTVAL (val) & 255;
20934 if (mode == DImode)
20935 v |= (v << 16) << 16;
20936 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20939 if (valmode == VOIDmode)
20941 if (valmode != QImode)
20942 val = gen_lowpart (QImode, val);
20943 if (mode == QImode)
20945 if (!TARGET_PARTIAL_REG_STALL)
20947 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20948 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20949 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20950 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20952 rtx reg = convert_modes (mode, QImode, val, true);
20953 tmp = promote_duplicated_reg (mode, const1_rtx);
20954 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20959 rtx reg = convert_modes (mode, QImode, val, true);
20961 if (!TARGET_PARTIAL_REG_STALL)
20962 if (mode == SImode)
20963 emit_insn (gen_movsi_insv_1 (reg, reg));
20965 emit_insn (gen_movdi_insv_1 (reg, reg));
20968 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20969 NULL, 1, OPTAB_DIRECT);
20971 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20973 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20974 NULL, 1, OPTAB_DIRECT);
20975 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20976 if (mode == SImode)
20978 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20979 NULL, 1, OPTAB_DIRECT);
20980 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20985 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20986 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20987 alignment from ALIGN to DESIRED_ALIGN. */
20989 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20994 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20995 promoted_val = promote_duplicated_reg (DImode, val);
20996 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20997 promoted_val = promote_duplicated_reg (SImode, val);
20998 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20999 promoted_val = promote_duplicated_reg (HImode, val);
21001 promoted_val = val;
21003 return promoted_val;
21006 /* Expand string clear operation (bzero). Use i386 string operations when
21007 profitable. See expand_movmem comment for explanation of individual
21008 steps performed. */
21010 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21011 rtx expected_align_exp, rtx expected_size_exp)
21016 rtx jump_around_label = NULL;
21017 HOST_WIDE_INT align = 1;
21018 unsigned HOST_WIDE_INT count = 0;
21019 HOST_WIDE_INT expected_size = -1;
21020 int size_needed = 0, epilogue_size_needed;
21021 int desired_align = 0, align_bytes = 0;
21022 enum stringop_alg alg;
21023 rtx promoted_val = NULL;
21024 bool force_loopy_epilogue = false;
21026 bool need_zero_guard = false;
21028 if (CONST_INT_P (align_exp))
21029 align = INTVAL (align_exp);
21030 /* i386 can do misaligned access on reasonably increased cost. */
21031 if (CONST_INT_P (expected_align_exp)
21032 && INTVAL (expected_align_exp) > align)
21033 align = INTVAL (expected_align_exp);
21034 if (CONST_INT_P (count_exp))
21035 count = expected_size = INTVAL (count_exp);
21036 if (CONST_INT_P (expected_size_exp) && count == 0)
21037 expected_size = INTVAL (expected_size_exp);
21039 /* Make sure we don't need to care about overflow later on. */
21040 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21043 /* Step 0: Decide on preferred algorithm, desired alignment and
21044 size of chunks to be copied by main loop. */
21046 alg = decide_alg (count, expected_size, true, &dynamic_check);
21047 desired_align = decide_alignment (align, alg, expected_size);
21049 if (!TARGET_ALIGN_STRINGOPS)
21050 align = desired_align;
21052 if (alg == libcall)
21054 gcc_assert (alg != no_stringop);
21056 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21057 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21062 gcc_unreachable ();
21064 need_zero_guard = true;
21065 size_needed = GET_MODE_SIZE (Pmode);
21067 case unrolled_loop:
21068 need_zero_guard = true;
21069 size_needed = GET_MODE_SIZE (Pmode) * 4;
21071 case rep_prefix_8_byte:
21074 case rep_prefix_4_byte:
21077 case rep_prefix_1_byte:
21081 need_zero_guard = true;
21085 epilogue_size_needed = size_needed;
21087 /* Step 1: Prologue guard. */
21089 /* Alignment code needs count to be in register. */
21090 if (CONST_INT_P (count_exp) && desired_align > align)
21092 if (INTVAL (count_exp) > desired_align
21093 && INTVAL (count_exp) > size_needed)
21096 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21097 if (align_bytes <= 0)
21100 align_bytes = desired_align - align_bytes;
21102 if (align_bytes == 0)
21104 enum machine_mode mode = SImode;
21105 if (TARGET_64BIT && (count & ~0xffffffff))
21107 count_exp = force_reg (mode, count_exp);
21110 /* Do the cheap promotion to allow better CSE across the
21111 main loop and epilogue (ie one load of the big constant in the
21112 front of all code. */
21113 if (CONST_INT_P (val_exp))
21114 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21115 desired_align, align);
21116 /* Ensure that alignment prologue won't copy past end of block. */
21117 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21119 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21120 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21121 Make sure it is power of 2. */
21122 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21124 /* To improve performance of small blocks, we jump around the VAL
21125 promoting mode. This mean that if the promoted VAL is not constant,
21126 we might not use it in the epilogue and have to use byte
21128 if (epilogue_size_needed > 2 && !promoted_val)
21129 force_loopy_epilogue = true;
21132 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21134 /* If main algorithm works on QImode, no epilogue is needed.
21135 For small sizes just don't align anything. */
21136 if (size_needed == 1)
21137 desired_align = align;
21144 label = gen_label_rtx ();
21145 emit_cmp_and_jump_insns (count_exp,
21146 GEN_INT (epilogue_size_needed),
21147 LTU, 0, counter_mode (count_exp), 1, label);
21148 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21149 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21151 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21154 if (dynamic_check != -1)
21156 rtx hot_label = gen_label_rtx ();
21157 jump_around_label = gen_label_rtx ();
21158 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21159 LEU, 0, counter_mode (count_exp), 1, hot_label);
21160 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21161 set_storage_via_libcall (dst, count_exp, val_exp, false);
21162 emit_jump (jump_around_label);
21163 emit_label (hot_label);
21166 /* Step 2: Alignment prologue. */
21168 /* Do the expensive promotion once we branched off the small blocks. */
21170 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21171 desired_align, align);
21172 gcc_assert (desired_align >= 1 && align >= 1);
21174 if (desired_align > align)
21176 if (align_bytes == 0)
21178 /* Except for the first move in epilogue, we no longer know
21179 constant offset in aliasing info. It don't seems to worth
21180 the pain to maintain it for the first move, so throw away
21182 dst = change_address (dst, BLKmode, destreg);
21183 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21188 /* If we know how many bytes need to be stored before dst is
21189 sufficiently aligned, maintain aliasing info accurately. */
21190 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21191 desired_align, align_bytes);
21192 count_exp = plus_constant (count_exp, -align_bytes);
21193 count -= align_bytes;
21195 if (need_zero_guard
21196 && (count < (unsigned HOST_WIDE_INT) size_needed
21197 || (align_bytes == 0
21198 && count < ((unsigned HOST_WIDE_INT) size_needed
21199 + desired_align - align))))
21201 /* It is possible that we copied enough so the main loop will not
21203 gcc_assert (size_needed > 1);
21204 if (label == NULL_RTX)
21205 label = gen_label_rtx ();
21206 emit_cmp_and_jump_insns (count_exp,
21207 GEN_INT (size_needed),
21208 LTU, 0, counter_mode (count_exp), 1, label);
21209 if (expected_size == -1
21210 || expected_size < (desired_align - align) / 2 + size_needed)
21211 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21213 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21216 if (label && size_needed == 1)
21218 emit_label (label);
21219 LABEL_NUSES (label) = 1;
21221 promoted_val = val_exp;
21222 epilogue_size_needed = 1;
21224 else if (label == NULL_RTX)
21225 epilogue_size_needed = size_needed;
21227 /* Step 3: Main loop. */
21233 gcc_unreachable ();
21235 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21236 count_exp, QImode, 1, expected_size);
21239 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21240 count_exp, Pmode, 1, expected_size);
21242 case unrolled_loop:
21243 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21244 count_exp, Pmode, 4, expected_size);
21246 case rep_prefix_8_byte:
21247 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21250 case rep_prefix_4_byte:
21251 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21254 case rep_prefix_1_byte:
21255 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21259 /* Adjust properly the offset of src and dest memory for aliasing. */
21260 if (CONST_INT_P (count_exp))
21261 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21262 (count / size_needed) * size_needed);
21264 dst = change_address (dst, BLKmode, destreg);
21266 /* Step 4: Epilogue to copy the remaining bytes. */
21270 /* When the main loop is done, COUNT_EXP might hold original count,
21271 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21272 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21273 bytes. Compensate if needed. */
21275 if (size_needed < epilogue_size_needed)
21278 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21279 GEN_INT (size_needed - 1), count_exp, 1,
21281 if (tmp != count_exp)
21282 emit_move_insn (count_exp, tmp);
21284 emit_label (label);
21285 LABEL_NUSES (label) = 1;
21288 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21290 if (force_loopy_epilogue)
21291 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21292 epilogue_size_needed);
21294 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21295 epilogue_size_needed);
21297 if (jump_around_label)
21298 emit_label (jump_around_label);
21302 /* Expand the appropriate insns for doing strlen if not just doing
21305 out = result, initialized with the start address
21306 align_rtx = alignment of the address.
21307 scratch = scratch register, initialized with the startaddress when
21308 not aligned, otherwise undefined
21310 This is just the body. It needs the initializations mentioned above and
21311 some address computing at the end. These things are done in i386.md. */
21314 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21318 rtx align_2_label = NULL_RTX;
21319 rtx align_3_label = NULL_RTX;
21320 rtx align_4_label = gen_label_rtx ();
21321 rtx end_0_label = gen_label_rtx ();
21323 rtx tmpreg = gen_reg_rtx (SImode);
21324 rtx scratch = gen_reg_rtx (SImode);
21328 if (CONST_INT_P (align_rtx))
21329 align = INTVAL (align_rtx);
21331 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21333 /* Is there a known alignment and is it less than 4? */
21336 rtx scratch1 = gen_reg_rtx (Pmode);
21337 emit_move_insn (scratch1, out);
21338 /* Is there a known alignment and is it not 2? */
21341 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21342 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21344 /* Leave just the 3 lower bits. */
21345 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21346 NULL_RTX, 0, OPTAB_WIDEN);
21348 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21349 Pmode, 1, align_4_label);
21350 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21351 Pmode, 1, align_2_label);
21352 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21353 Pmode, 1, align_3_label);
21357 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21358 check if is aligned to 4 - byte. */
21360 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21361 NULL_RTX, 0, OPTAB_WIDEN);
21363 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21364 Pmode, 1, align_4_label);
21367 mem = change_address (src, QImode, out);
21369 /* Now compare the bytes. */
21371 /* Compare the first n unaligned byte on a byte per byte basis. */
21372 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21373 QImode, 1, end_0_label);
21375 /* Increment the address. */
21376 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21378 /* Not needed with an alignment of 2 */
21381 emit_label (align_2_label);
21383 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21386 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21388 emit_label (align_3_label);
21391 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21394 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21397 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21398 align this loop. It gives only huge programs, but does not help to
21400 emit_label (align_4_label);
21402 mem = change_address (src, SImode, out);
21403 emit_move_insn (scratch, mem);
21404 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21406 /* This formula yields a nonzero result iff one of the bytes is zero.
21407 This saves three branches inside loop and many cycles. */
21409 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21410 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21411 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21412 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21413 gen_int_mode (0x80808080, SImode)));
21414 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21419 rtx reg = gen_reg_rtx (SImode);
21420 rtx reg2 = gen_reg_rtx (Pmode);
21421 emit_move_insn (reg, tmpreg);
21422 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21424 /* If zero is not in the first two bytes, move two bytes forward. */
21425 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21426 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21427 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21428 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21429 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21432 /* Emit lea manually to avoid clobbering of flags. */
21433 emit_insn (gen_rtx_SET (SImode, reg2,
21434 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21436 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21437 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21438 emit_insn (gen_rtx_SET (VOIDmode, out,
21439 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21445 rtx end_2_label = gen_label_rtx ();
21446 /* Is zero in the first two bytes? */
21448 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21449 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21450 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21451 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21452 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21454 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21455 JUMP_LABEL (tmp) = end_2_label;
21457 /* Not in the first two. Move two bytes forward. */
21458 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21459 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21461 emit_label (end_2_label);
21465 /* Avoid branch in fixing the byte. */
21466 tmpreg = gen_lowpart (QImode, tmpreg);
21467 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21468 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21469 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21470 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21472 emit_label (end_0_label);
21475 /* Expand strlen. */
21478 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21480 rtx addr, scratch1, scratch2, scratch3, scratch4;
21482 /* The generic case of strlen expander is long. Avoid it's
21483 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21485 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21486 && !TARGET_INLINE_ALL_STRINGOPS
21487 && !optimize_insn_for_size_p ()
21488 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21491 addr = force_reg (Pmode, XEXP (src, 0));
21492 scratch1 = gen_reg_rtx (Pmode);
21494 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21495 && !optimize_insn_for_size_p ())
21497 /* Well it seems that some optimizer does not combine a call like
21498 foo(strlen(bar), strlen(bar));
21499 when the move and the subtraction is done here. It does calculate
21500 the length just once when these instructions are done inside of
21501 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21502 often used and I use one fewer register for the lifetime of
21503 output_strlen_unroll() this is better. */
21505 emit_move_insn (out, addr);
21507 ix86_expand_strlensi_unroll_1 (out, src, align);
21509 /* strlensi_unroll_1 returns the address of the zero at the end of
21510 the string, like memchr(), so compute the length by subtracting
21511 the start address. */
21512 emit_insn (ix86_gen_sub3 (out, out, addr));
21518 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21519 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21522 scratch2 = gen_reg_rtx (Pmode);
21523 scratch3 = gen_reg_rtx (Pmode);
21524 scratch4 = force_reg (Pmode, constm1_rtx);
21526 emit_move_insn (scratch3, addr);
21527 eoschar = force_reg (QImode, eoschar);
21529 src = replace_equiv_address_nv (src, scratch3);
21531 /* If .md starts supporting :P, this can be done in .md. */
21532 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21533 scratch4), UNSPEC_SCAS);
21534 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21535 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21536 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21541 /* For given symbol (function) construct code to compute address of it's PLT
21542 entry in large x86-64 PIC model. */
21544 construct_plt_address (rtx symbol)
21546 rtx tmp = gen_reg_rtx (Pmode);
21547 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21549 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21550 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21552 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21553 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21558 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21560 rtx pop, bool sibcall)
21562 /* We need to represent that SI and DI registers are clobbered
21564 static int clobbered_registers[] = {
21565 XMM6_REG, XMM7_REG, XMM8_REG,
21566 XMM9_REG, XMM10_REG, XMM11_REG,
21567 XMM12_REG, XMM13_REG, XMM14_REG,
21568 XMM15_REG, SI_REG, DI_REG
21570 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
21571 rtx use = NULL, call;
21572 unsigned int vec_len;
21574 if (pop == const0_rtx)
21576 gcc_assert (!TARGET_64BIT || !pop);
21578 if (TARGET_MACHO && !TARGET_64BIT)
21581 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21582 fnaddr = machopic_indirect_call_target (fnaddr);
21587 /* Static functions and indirect calls don't need the pic register. */
21588 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21589 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21590 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21591 use_reg (&use, pic_offset_table_rtx);
21594 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21596 rtx al = gen_rtx_REG (QImode, AX_REG);
21597 emit_move_insn (al, callarg2);
21598 use_reg (&use, al);
21601 if (ix86_cmodel == CM_LARGE_PIC
21603 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21604 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21605 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21607 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21608 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21610 fnaddr = XEXP (fnaddr, 0);
21611 if (GET_MODE (fnaddr) != Pmode)
21612 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
21613 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
21617 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21619 call = gen_rtx_SET (VOIDmode, retval, call);
21620 vec[vec_len++] = call;
21624 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21625 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21626 vec[vec_len++] = pop;
21629 if (TARGET_64BIT_MS_ABI
21630 && (!callarg2 || INTVAL (callarg2) != -2))
21634 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21635 UNSPEC_MS_TO_SYSV_CALL);
21637 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21639 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21641 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
21643 clobbered_registers[i]));
21646 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21647 if (TARGET_VZEROUPPER)
21650 if (cfun->machine->callee_pass_avx256_p)
21652 if (cfun->machine->callee_return_avx256_p)
21653 avx256 = callee_return_pass_avx256;
21655 avx256 = callee_pass_avx256;
21657 else if (cfun->machine->callee_return_avx256_p)
21658 avx256 = callee_return_avx256;
21660 avx256 = call_no_avx256;
21662 if (reload_completed)
21663 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21665 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
21666 gen_rtvec (1, GEN_INT (avx256)),
21667 UNSPEC_CALL_NEEDS_VZEROUPPER);
21671 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
21672 call = emit_call_insn (call);
21674 CALL_INSN_FUNCTION_USAGE (call) = use;
21680 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21682 rtx pat = PATTERN (insn);
21683 rtvec vec = XVEC (pat, 0);
21684 int len = GET_NUM_ELEM (vec) - 1;
21686 /* Strip off the last entry of the parallel. */
21687 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
21688 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
21690 pat = RTVEC_ELT (vec, 0);
21692 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
21694 emit_insn (gen_avx_vzeroupper (vzeroupper));
21695 emit_call_insn (pat);
21698 /* Output the assembly for a call instruction. */
21701 ix86_output_call_insn (rtx insn, rtx call_op)
21703 bool direct_p = constant_call_address_operand (call_op, Pmode);
21704 bool seh_nop_p = false;
21707 if (SIBLING_CALL_P (insn))
21711 /* SEH epilogue detection requires the indirect branch case
21712 to include REX.W. */
21713 else if (TARGET_SEH)
21714 xasm = "rex.W jmp %A0";
21718 output_asm_insn (xasm, &call_op);
21722 /* SEH unwinding can require an extra nop to be emitted in several
21723 circumstances. Determine if we have one of those. */
21728 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21730 /* If we get to another real insn, we don't need the nop. */
21734 /* If we get to the epilogue note, prevent a catch region from
21735 being adjacent to the standard epilogue sequence. If non-
21736 call-exceptions, we'll have done this during epilogue emission. */
21737 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21738 && !flag_non_call_exceptions
21739 && !can_throw_internal (insn))
21746 /* If we didn't find a real insn following the call, prevent the
21747 unwinder from looking into the next function. */
21753 xasm = "call\t%P0";
21755 xasm = "call\t%A0";
21757 output_asm_insn (xasm, &call_op);
21765 /* Clear stack slot assignments remembered from previous functions.
21766 This is called from INIT_EXPANDERS once before RTL is emitted for each
21769 static struct machine_function *
21770 ix86_init_machine_status (void)
21772 struct machine_function *f;
21774 f = ggc_alloc_cleared_machine_function ();
21775 f->use_fast_prologue_epilogue_nregs = -1;
21776 f->tls_descriptor_call_expanded_p = 0;
21777 f->call_abi = ix86_abi;
21782 /* Return a MEM corresponding to a stack slot with mode MODE.
21783 Allocate a new slot if necessary.
21785 The RTL for a function can have several slots available: N is
21786 which slot to use. */
21789 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21791 struct stack_local_entry *s;
21793 gcc_assert (n < MAX_386_STACK_LOCALS);
21795 /* Virtual slot is valid only before vregs are instantiated. */
21796 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21798 for (s = ix86_stack_locals; s; s = s->next)
21799 if (s->mode == mode && s->n == n)
21800 return validize_mem (copy_rtx (s->rtl));
21802 s = ggc_alloc_stack_local_entry ();
21805 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21807 s->next = ix86_stack_locals;
21808 ix86_stack_locals = s;
21809 return validize_mem (s->rtl);
21812 /* Calculate the length of the memory address in the instruction encoding.
21813 Includes addr32 prefix, does not include the one-byte modrm, opcode,
21814 or other prefixes. */
21817 memory_address_length (rtx addr)
21819 struct ix86_address parts;
21820 rtx base, index, disp;
21824 if (GET_CODE (addr) == PRE_DEC
21825 || GET_CODE (addr) == POST_INC
21826 || GET_CODE (addr) == PRE_MODIFY
21827 || GET_CODE (addr) == POST_MODIFY)
21830 ok = ix86_decompose_address (addr, &parts);
21833 if (parts.base && GET_CODE (parts.base) == SUBREG)
21834 parts.base = SUBREG_REG (parts.base);
21835 if (parts.index && GET_CODE (parts.index) == SUBREG)
21836 parts.index = SUBREG_REG (parts.index);
21839 index = parts.index;
21842 /* Add length of addr32 prefix. */
21843 len = (GET_CODE (addr) == ZERO_EXTEND
21844 || GET_CODE (addr) == AND);
21847 - esp as the base always wants an index,
21848 - ebp as the base always wants a displacement,
21849 - r12 as the base always wants an index,
21850 - r13 as the base always wants a displacement. */
21852 /* Register Indirect. */
21853 if (base && !index && !disp)
21855 /* esp (for its index) and ebp (for its displacement) need
21856 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21859 && (addr == arg_pointer_rtx
21860 || addr == frame_pointer_rtx
21861 || REGNO (addr) == SP_REG
21862 || REGNO (addr) == BP_REG
21863 || REGNO (addr) == R12_REG
21864 || REGNO (addr) == R13_REG))
21868 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21869 is not disp32, but disp32(%rip), so for disp32
21870 SIB byte is needed, unless print_operand_address
21871 optimizes it into disp32(%rip) or (%rip) is implied
21873 else if (disp && !base && !index)
21880 if (GET_CODE (disp) == CONST)
21881 symbol = XEXP (disp, 0);
21882 if (GET_CODE (symbol) == PLUS
21883 && CONST_INT_P (XEXP (symbol, 1)))
21884 symbol = XEXP (symbol, 0);
21886 if (GET_CODE (symbol) != LABEL_REF
21887 && (GET_CODE (symbol) != SYMBOL_REF
21888 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21889 && (GET_CODE (symbol) != UNSPEC
21890 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21891 && XINT (symbol, 1) != UNSPEC_PCREL
21892 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21899 /* Find the length of the displacement constant. */
21902 if (base && satisfies_constraint_K (disp))
21907 /* ebp always wants a displacement. Similarly r13. */
21908 else if (base && REG_P (base)
21909 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21912 /* An index requires the two-byte modrm form.... */
21914 /* ...like esp (or r12), which always wants an index. */
21915 || base == arg_pointer_rtx
21916 || base == frame_pointer_rtx
21917 || (base && REG_P (base)
21918 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21935 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21936 is set, expect that insn have 8bit immediate alternative. */
21938 ix86_attr_length_immediate_default (rtx insn, bool shortform)
21942 extract_insn_cached (insn);
21943 for (i = recog_data.n_operands - 1; i >= 0; --i)
21944 if (CONSTANT_P (recog_data.operand[i]))
21946 enum attr_mode mode = get_attr_mode (insn);
21949 if (shortform && CONST_INT_P (recog_data.operand[i]))
21951 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21958 ival = trunc_int_for_mode (ival, HImode);
21961 ival = trunc_int_for_mode (ival, SImode);
21966 if (IN_RANGE (ival, -128, 127))
21983 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21988 fatal_insn ("unknown insn mode", insn);
21993 /* Compute default value for "length_address" attribute. */
21995 ix86_attr_length_address_default (rtx insn)
21999 if (get_attr_type (insn) == TYPE_LEA)
22001 rtx set = PATTERN (insn), addr;
22003 if (GET_CODE (set) == PARALLEL)
22004 set = XVECEXP (set, 0, 0);
22006 gcc_assert (GET_CODE (set) == SET);
22008 addr = SET_SRC (set);
22009 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22011 if (GET_CODE (addr) == ZERO_EXTEND)
22012 addr = XEXP (addr, 0);
22013 if (GET_CODE (addr) == SUBREG)
22014 addr = SUBREG_REG (addr);
22017 return memory_address_length (addr);
22020 extract_insn_cached (insn);
22021 for (i = recog_data.n_operands - 1; i >= 0; --i)
22022 if (MEM_P (recog_data.operand[i]))
22024 constrain_operands_cached (reload_completed);
22025 if (which_alternative != -1)
22027 const char *constraints = recog_data.constraints[i];
22028 int alt = which_alternative;
22030 while (*constraints == '=' || *constraints == '+')
22033 while (*constraints++ != ',')
22035 /* Skip ignored operands. */
22036 if (*constraints == 'X')
22039 return memory_address_length (XEXP (recog_data.operand[i], 0));
22044 /* Compute default value for "length_vex" attribute. It includes
22045 2 or 3 byte VEX prefix and 1 opcode byte. */
22048 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22052 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22053 byte VEX prefix. */
22054 if (!has_0f_opcode || has_vex_w)
22057 /* We can always use 2 byte VEX prefix in 32bit. */
22061 extract_insn_cached (insn);
22063 for (i = recog_data.n_operands - 1; i >= 0; --i)
22064 if (REG_P (recog_data.operand[i]))
22066 /* REX.W bit uses 3 byte VEX prefix. */
22067 if (GET_MODE (recog_data.operand[i]) == DImode
22068 && GENERAL_REG_P (recog_data.operand[i]))
22073 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22074 if (MEM_P (recog_data.operand[i])
22075 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22082 /* Return the maximum number of instructions a cpu can issue. */
22085 ix86_issue_rate (void)
22089 case PROCESSOR_PENTIUM:
22090 case PROCESSOR_ATOM:
22094 case PROCESSOR_PENTIUMPRO:
22095 case PROCESSOR_PENTIUM4:
22096 case PROCESSOR_CORE2_32:
22097 case PROCESSOR_CORE2_64:
22098 case PROCESSOR_COREI7_32:
22099 case PROCESSOR_COREI7_64:
22100 case PROCESSOR_ATHLON:
22102 case PROCESSOR_AMDFAM10:
22103 case PROCESSOR_NOCONA:
22104 case PROCESSOR_GENERIC32:
22105 case PROCESSOR_GENERIC64:
22106 case PROCESSOR_BDVER1:
22107 case PROCESSOR_BDVER2:
22108 case PROCESSOR_BTVER1:
22116 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22117 by DEP_INSN and nothing set by DEP_INSN. */
22120 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22124 /* Simplify the test for uninteresting insns. */
22125 if (insn_type != TYPE_SETCC
22126 && insn_type != TYPE_ICMOV
22127 && insn_type != TYPE_FCMOV
22128 && insn_type != TYPE_IBR)
22131 if ((set = single_set (dep_insn)) != 0)
22133 set = SET_DEST (set);
22136 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22137 && XVECLEN (PATTERN (dep_insn), 0) == 2
22138 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22139 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22141 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22142 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22147 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22150 /* This test is true if the dependent insn reads the flags but
22151 not any other potentially set register. */
22152 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22155 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22161 /* Return true iff USE_INSN has a memory address with operands set by
22165 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22168 extract_insn_cached (use_insn);
22169 for (i = recog_data.n_operands - 1; i >= 0; --i)
22170 if (MEM_P (recog_data.operand[i]))
22172 rtx addr = XEXP (recog_data.operand[i], 0);
22173 return modified_in_p (addr, set_insn) != 0;
22179 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22181 enum attr_type insn_type, dep_insn_type;
22182 enum attr_memory memory;
22184 int dep_insn_code_number;
22186 /* Anti and output dependencies have zero cost on all CPUs. */
22187 if (REG_NOTE_KIND (link) != 0)
22190 dep_insn_code_number = recog_memoized (dep_insn);
22192 /* If we can't recognize the insns, we can't really do anything. */
22193 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22196 insn_type = get_attr_type (insn);
22197 dep_insn_type = get_attr_type (dep_insn);
22201 case PROCESSOR_PENTIUM:
22202 /* Address Generation Interlock adds a cycle of latency. */
22203 if (insn_type == TYPE_LEA)
22205 rtx addr = PATTERN (insn);
22207 if (GET_CODE (addr) == PARALLEL)
22208 addr = XVECEXP (addr, 0, 0);
22210 gcc_assert (GET_CODE (addr) == SET);
22212 addr = SET_SRC (addr);
22213 if (modified_in_p (addr, dep_insn))
22216 else if (ix86_agi_dependent (dep_insn, insn))
22219 /* ??? Compares pair with jump/setcc. */
22220 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22223 /* Floating point stores require value to be ready one cycle earlier. */
22224 if (insn_type == TYPE_FMOV
22225 && get_attr_memory (insn) == MEMORY_STORE
22226 && !ix86_agi_dependent (dep_insn, insn))
22230 case PROCESSOR_PENTIUMPRO:
22231 memory = get_attr_memory (insn);
22233 /* INT->FP conversion is expensive. */
22234 if (get_attr_fp_int_src (dep_insn))
22237 /* There is one cycle extra latency between an FP op and a store. */
22238 if (insn_type == TYPE_FMOV
22239 && (set = single_set (dep_insn)) != NULL_RTX
22240 && (set2 = single_set (insn)) != NULL_RTX
22241 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22242 && MEM_P (SET_DEST (set2)))
22245 /* Show ability of reorder buffer to hide latency of load by executing
22246 in parallel with previous instruction in case
22247 previous instruction is not needed to compute the address. */
22248 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22249 && !ix86_agi_dependent (dep_insn, insn))
22251 /* Claim moves to take one cycle, as core can issue one load
22252 at time and the next load can start cycle later. */
22253 if (dep_insn_type == TYPE_IMOV
22254 || dep_insn_type == TYPE_FMOV)
22262 memory = get_attr_memory (insn);
22264 /* The esp dependency is resolved before the instruction is really
22266 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22267 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22270 /* INT->FP conversion is expensive. */
22271 if (get_attr_fp_int_src (dep_insn))
22274 /* Show ability of reorder buffer to hide latency of load by executing
22275 in parallel with previous instruction in case
22276 previous instruction is not needed to compute the address. */
22277 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22278 && !ix86_agi_dependent (dep_insn, insn))
22280 /* Claim moves to take one cycle, as core can issue one load
22281 at time and the next load can start cycle later. */
22282 if (dep_insn_type == TYPE_IMOV
22283 || dep_insn_type == TYPE_FMOV)
22292 case PROCESSOR_ATHLON:
22294 case PROCESSOR_AMDFAM10:
22295 case PROCESSOR_BDVER1:
22296 case PROCESSOR_BDVER2:
22297 case PROCESSOR_BTVER1:
22298 case PROCESSOR_ATOM:
22299 case PROCESSOR_GENERIC32:
22300 case PROCESSOR_GENERIC64:
22301 memory = get_attr_memory (insn);
22303 /* Show ability of reorder buffer to hide latency of load by executing
22304 in parallel with previous instruction in case
22305 previous instruction is not needed to compute the address. */
22306 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22307 && !ix86_agi_dependent (dep_insn, insn))
22309 enum attr_unit unit = get_attr_unit (insn);
22312 /* Because of the difference between the length of integer and
22313 floating unit pipeline preparation stages, the memory operands
22314 for floating point are cheaper.
22316 ??? For Athlon it the difference is most probably 2. */
22317 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22320 loadcost = TARGET_ATHLON ? 2 : 0;
22322 if (cost >= loadcost)
22335 /* How many alternative schedules to try. This should be as wide as the
22336 scheduling freedom in the DFA, but no wider. Making this value too
22337 large results extra work for the scheduler. */
22340 ia32_multipass_dfa_lookahead (void)
22344 case PROCESSOR_PENTIUM:
22347 case PROCESSOR_PENTIUMPRO:
22351 case PROCESSOR_CORE2_32:
22352 case PROCESSOR_CORE2_64:
22353 case PROCESSOR_COREI7_32:
22354 case PROCESSOR_COREI7_64:
22355 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22356 as many instructions can be executed on a cycle, i.e.,
22357 issue_rate. I wonder why tuning for many CPUs does not do this. */
22358 return ix86_issue_rate ();
22367 /* Model decoder of Core 2/i7.
22368 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22369 track the instruction fetch block boundaries and make sure that long
22370 (9+ bytes) instructions are assigned to D0. */
22372 /* Maximum length of an insn that can be handled by
22373 a secondary decoder unit. '8' for Core 2/i7. */
22374 static int core2i7_secondary_decoder_max_insn_size;
22376 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22377 '16' for Core 2/i7. */
22378 static int core2i7_ifetch_block_size;
22380 /* Maximum number of instructions decoder can handle per cycle.
22381 '6' for Core 2/i7. */
22382 static int core2i7_ifetch_block_max_insns;
22384 typedef struct ix86_first_cycle_multipass_data_ *
22385 ix86_first_cycle_multipass_data_t;
22386 typedef const struct ix86_first_cycle_multipass_data_ *
22387 const_ix86_first_cycle_multipass_data_t;
22389 /* A variable to store target state across calls to max_issue within
22391 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22392 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22394 /* Initialize DATA. */
22396 core2i7_first_cycle_multipass_init (void *_data)
22398 ix86_first_cycle_multipass_data_t data
22399 = (ix86_first_cycle_multipass_data_t) _data;
22401 data->ifetch_block_len = 0;
22402 data->ifetch_block_n_insns = 0;
22403 data->ready_try_change = NULL;
22404 data->ready_try_change_size = 0;
22407 /* Advancing the cycle; reset ifetch block counts. */
22409 core2i7_dfa_post_advance_cycle (void)
22411 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22413 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22415 data->ifetch_block_len = 0;
22416 data->ifetch_block_n_insns = 0;
22419 static int min_insn_size (rtx);
22421 /* Filter out insns from ready_try that the core will not be able to issue
22422 on current cycle due to decoder. */
22424 core2i7_first_cycle_multipass_filter_ready_try
22425 (const_ix86_first_cycle_multipass_data_t data,
22426 char *ready_try, int n_ready, bool first_cycle_insn_p)
22433 if (ready_try[n_ready])
22436 insn = get_ready_element (n_ready);
22437 insn_size = min_insn_size (insn);
22439 if (/* If this is a too long an insn for a secondary decoder ... */
22440 (!first_cycle_insn_p
22441 && insn_size > core2i7_secondary_decoder_max_insn_size)
22442 /* ... or it would not fit into the ifetch block ... */
22443 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22444 /* ... or the decoder is full already ... */
22445 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22446 /* ... mask the insn out. */
22448 ready_try[n_ready] = 1;
22450 if (data->ready_try_change)
22451 SET_BIT (data->ready_try_change, n_ready);
22456 /* Prepare for a new round of multipass lookahead scheduling. */
22458 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22459 bool first_cycle_insn_p)
22461 ix86_first_cycle_multipass_data_t data
22462 = (ix86_first_cycle_multipass_data_t) _data;
22463 const_ix86_first_cycle_multipass_data_t prev_data
22464 = ix86_first_cycle_multipass_data;
22466 /* Restore the state from the end of the previous round. */
22467 data->ifetch_block_len = prev_data->ifetch_block_len;
22468 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22470 /* Filter instructions that cannot be issued on current cycle due to
22471 decoder restrictions. */
22472 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22473 first_cycle_insn_p);
22476 /* INSN is being issued in current solution. Account for its impact on
22477 the decoder model. */
22479 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22480 rtx insn, const void *_prev_data)
22482 ix86_first_cycle_multipass_data_t data
22483 = (ix86_first_cycle_multipass_data_t) _data;
22484 const_ix86_first_cycle_multipass_data_t prev_data
22485 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22487 int insn_size = min_insn_size (insn);
22489 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22490 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22491 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22492 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22494 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22495 if (!data->ready_try_change)
22497 data->ready_try_change = sbitmap_alloc (n_ready);
22498 data->ready_try_change_size = n_ready;
22500 else if (data->ready_try_change_size < n_ready)
22502 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22504 data->ready_try_change_size = n_ready;
22506 sbitmap_zero (data->ready_try_change);
22508 /* Filter out insns from ready_try that the core will not be able to issue
22509 on current cycle due to decoder. */
22510 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22514 /* Revert the effect on ready_try. */
22516 core2i7_first_cycle_multipass_backtrack (const void *_data,
22518 int n_ready ATTRIBUTE_UNUSED)
22520 const_ix86_first_cycle_multipass_data_t data
22521 = (const_ix86_first_cycle_multipass_data_t) _data;
22522 unsigned int i = 0;
22523 sbitmap_iterator sbi;
22525 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22526 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22532 /* Save the result of multipass lookahead scheduling for the next round. */
22534 core2i7_first_cycle_multipass_end (const void *_data)
22536 const_ix86_first_cycle_multipass_data_t data
22537 = (const_ix86_first_cycle_multipass_data_t) _data;
22538 ix86_first_cycle_multipass_data_t next_data
22539 = ix86_first_cycle_multipass_data;
22543 next_data->ifetch_block_len = data->ifetch_block_len;
22544 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22548 /* Deallocate target data. */
22550 core2i7_first_cycle_multipass_fini (void *_data)
22552 ix86_first_cycle_multipass_data_t data
22553 = (ix86_first_cycle_multipass_data_t) _data;
22555 if (data->ready_try_change)
22557 sbitmap_free (data->ready_try_change);
22558 data->ready_try_change = NULL;
22559 data->ready_try_change_size = 0;
22563 /* Prepare for scheduling pass. */
22565 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22566 int verbose ATTRIBUTE_UNUSED,
22567 int max_uid ATTRIBUTE_UNUSED)
22569 /* Install scheduling hooks for current CPU. Some of these hooks are used
22570 in time-critical parts of the scheduler, so we only set them up when
22571 they are actually used. */
22574 case PROCESSOR_CORE2_32:
22575 case PROCESSOR_CORE2_64:
22576 case PROCESSOR_COREI7_32:
22577 case PROCESSOR_COREI7_64:
22578 targetm.sched.dfa_post_advance_cycle
22579 = core2i7_dfa_post_advance_cycle;
22580 targetm.sched.first_cycle_multipass_init
22581 = core2i7_first_cycle_multipass_init;
22582 targetm.sched.first_cycle_multipass_begin
22583 = core2i7_first_cycle_multipass_begin;
22584 targetm.sched.first_cycle_multipass_issue
22585 = core2i7_first_cycle_multipass_issue;
22586 targetm.sched.first_cycle_multipass_backtrack
22587 = core2i7_first_cycle_multipass_backtrack;
22588 targetm.sched.first_cycle_multipass_end
22589 = core2i7_first_cycle_multipass_end;
22590 targetm.sched.first_cycle_multipass_fini
22591 = core2i7_first_cycle_multipass_fini;
22593 /* Set decoder parameters. */
22594 core2i7_secondary_decoder_max_insn_size = 8;
22595 core2i7_ifetch_block_size = 16;
22596 core2i7_ifetch_block_max_insns = 6;
22600 targetm.sched.dfa_post_advance_cycle = NULL;
22601 targetm.sched.first_cycle_multipass_init = NULL;
22602 targetm.sched.first_cycle_multipass_begin = NULL;
22603 targetm.sched.first_cycle_multipass_issue = NULL;
22604 targetm.sched.first_cycle_multipass_backtrack = NULL;
22605 targetm.sched.first_cycle_multipass_end = NULL;
22606 targetm.sched.first_cycle_multipass_fini = NULL;
22612 /* Compute the alignment given to a constant that is being placed in memory.
22613 EXP is the constant and ALIGN is the alignment that the object would
22615 The value of this function is used instead of that alignment to align
22619 ix86_constant_alignment (tree exp, int align)
22621 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22622 || TREE_CODE (exp) == INTEGER_CST)
22624 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22626 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22629 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22630 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22631 return BITS_PER_WORD;
22636 /* Compute the alignment for a static variable.
22637 TYPE is the data type, and ALIGN is the alignment that
22638 the object would ordinarily have. The value of this function is used
22639 instead of that alignment to align the object. */
22642 ix86_data_alignment (tree type, int align)
22644 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22646 if (AGGREGATE_TYPE_P (type)
22647 && TYPE_SIZE (type)
22648 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22649 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22650 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22651 && align < max_align)
22654 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22655 to 16byte boundary. */
22658 if (AGGREGATE_TYPE_P (type)
22659 && TYPE_SIZE (type)
22660 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22661 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22662 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22666 if (TREE_CODE (type) == ARRAY_TYPE)
22668 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22670 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22673 else if (TREE_CODE (type) == COMPLEX_TYPE)
22676 if (TYPE_MODE (type) == DCmode && align < 64)
22678 if ((TYPE_MODE (type) == XCmode
22679 || TYPE_MODE (type) == TCmode) && align < 128)
22682 else if ((TREE_CODE (type) == RECORD_TYPE
22683 || TREE_CODE (type) == UNION_TYPE
22684 || TREE_CODE (type) == QUAL_UNION_TYPE)
22685 && TYPE_FIELDS (type))
22687 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22689 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22692 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22693 || TREE_CODE (type) == INTEGER_TYPE)
22695 if (TYPE_MODE (type) == DFmode && align < 64)
22697 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22704 /* Compute the alignment for a local variable or a stack slot. EXP is
22705 the data type or decl itself, MODE is the widest mode available and
22706 ALIGN is the alignment that the object would ordinarily have. The
22707 value of this macro is used instead of that alignment to align the
22711 ix86_local_alignment (tree exp, enum machine_mode mode,
22712 unsigned int align)
22716 if (exp && DECL_P (exp))
22718 type = TREE_TYPE (exp);
22727 /* Don't do dynamic stack realignment for long long objects with
22728 -mpreferred-stack-boundary=2. */
22731 && ix86_preferred_stack_boundary < 64
22732 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22733 && (!type || !TYPE_USER_ALIGN (type))
22734 && (!decl || !DECL_USER_ALIGN (decl)))
22737 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22738 register in MODE. We will return the largest alignment of XF
22742 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22743 align = GET_MODE_ALIGNMENT (DFmode);
22747 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22748 to 16byte boundary. Exact wording is:
22750 An array uses the same alignment as its elements, except that a local or
22751 global array variable of length at least 16 bytes or
22752 a C99 variable-length array variable always has alignment of at least 16 bytes.
22754 This was added to allow use of aligned SSE instructions at arrays. This
22755 rule is meant for static storage (where compiler can not do the analysis
22756 by itself). We follow it for automatic variables only when convenient.
22757 We fully control everything in the function compiled and functions from
22758 other unit can not rely on the alignment.
22760 Exclude va_list type. It is the common case of local array where
22761 we can not benefit from the alignment. */
22762 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22765 if (AGGREGATE_TYPE_P (type)
22766 && (va_list_type_node == NULL_TREE
22767 || (TYPE_MAIN_VARIANT (type)
22768 != TYPE_MAIN_VARIANT (va_list_type_node)))
22769 && TYPE_SIZE (type)
22770 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22771 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22772 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22775 if (TREE_CODE (type) == ARRAY_TYPE)
22777 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22779 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22782 else if (TREE_CODE (type) == COMPLEX_TYPE)
22784 if (TYPE_MODE (type) == DCmode && align < 64)
22786 if ((TYPE_MODE (type) == XCmode
22787 || TYPE_MODE (type) == TCmode) && align < 128)
22790 else if ((TREE_CODE (type) == RECORD_TYPE
22791 || TREE_CODE (type) == UNION_TYPE
22792 || TREE_CODE (type) == QUAL_UNION_TYPE)
22793 && TYPE_FIELDS (type))
22795 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22797 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22800 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22801 || TREE_CODE (type) == INTEGER_TYPE)
22804 if (TYPE_MODE (type) == DFmode && align < 64)
22806 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22812 /* Compute the minimum required alignment for dynamic stack realignment
22813 purposes for a local variable, parameter or a stack slot. EXP is
22814 the data type or decl itself, MODE is its mode and ALIGN is the
22815 alignment that the object would ordinarily have. */
22818 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22819 unsigned int align)
22823 if (exp && DECL_P (exp))
22825 type = TREE_TYPE (exp);
22834 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22837 /* Don't do dynamic stack realignment for long long objects with
22838 -mpreferred-stack-boundary=2. */
22839 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22840 && (!type || !TYPE_USER_ALIGN (type))
22841 && (!decl || !DECL_USER_ALIGN (decl)))
22847 /* Find a location for the static chain incoming to a nested function.
22848 This is a register, unless all free registers are used by arguments. */
22851 ix86_static_chain (const_tree fndecl, bool incoming_p)
22855 if (!DECL_STATIC_CHAIN (fndecl))
22860 /* We always use R10 in 64-bit mode. */
22868 /* By default in 32-bit mode we use ECX to pass the static chain. */
22871 fntype = TREE_TYPE (fndecl);
22872 ccvt = ix86_get_callcvt (fntype);
22873 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
22875 /* Fastcall functions use ecx/edx for arguments, which leaves
22876 us with EAX for the static chain.
22877 Thiscall functions use ecx for arguments, which also
22878 leaves us with EAX for the static chain. */
22881 else if (ix86_function_regparm (fntype, fndecl) == 3)
22883 /* For regparm 3, we have no free call-clobbered registers in
22884 which to store the static chain. In order to implement this,
22885 we have the trampoline push the static chain to the stack.
22886 However, we can't push a value below the return address when
22887 we call the nested function directly, so we have to use an
22888 alternate entry point. For this we use ESI, and have the
22889 alternate entry point push ESI, so that things appear the
22890 same once we're executing the nested function. */
22893 if (fndecl == current_function_decl)
22894 ix86_static_chain_on_stack = true;
22895 return gen_frame_mem (SImode,
22896 plus_constant (arg_pointer_rtx, -8));
22902 return gen_rtx_REG (Pmode, regno);
22905 /* Emit RTL insns to initialize the variable parts of a trampoline.
22906 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22907 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22908 to be passed to the target function. */
22911 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22917 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22923 /* Load the function address to r11. Try to load address using
22924 the shorter movl instead of movabs. We may want to support
22925 movq for kernel mode, but kernel does not use trampolines at
22927 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22929 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22931 mem = adjust_address (m_tramp, HImode, offset);
22932 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22934 mem = adjust_address (m_tramp, SImode, offset + 2);
22935 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22940 mem = adjust_address (m_tramp, HImode, offset);
22941 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22943 mem = adjust_address (m_tramp, DImode, offset + 2);
22944 emit_move_insn (mem, fnaddr);
22948 /* Load static chain using movabs to r10. Use the
22949 shorter movl instead of movabs for x32. */
22961 mem = adjust_address (m_tramp, HImode, offset);
22962 emit_move_insn (mem, gen_int_mode (opcode, HImode));
22964 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
22965 emit_move_insn (mem, chain_value);
22968 /* Jump to r11; the last (unused) byte is a nop, only there to
22969 pad the write out to a single 32-bit store. */
22970 mem = adjust_address (m_tramp, SImode, offset);
22971 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22978 /* Depending on the static chain location, either load a register
22979 with a constant, or push the constant to the stack. All of the
22980 instructions are the same size. */
22981 chain = ix86_static_chain (fndecl, true);
22984 switch (REGNO (chain))
22987 opcode = 0xb8; break;
22989 opcode = 0xb9; break;
22991 gcc_unreachable ();
22997 mem = adjust_address (m_tramp, QImode, offset);
22998 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23000 mem = adjust_address (m_tramp, SImode, offset + 1);
23001 emit_move_insn (mem, chain_value);
23004 mem = adjust_address (m_tramp, QImode, offset);
23005 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23007 mem = adjust_address (m_tramp, SImode, offset + 1);
23009 /* Compute offset from the end of the jmp to the target function.
23010 In the case in which the trampoline stores the static chain on
23011 the stack, we need to skip the first insn which pushes the
23012 (call-saved) register static chain; this push is 1 byte. */
23014 disp = expand_binop (SImode, sub_optab, fnaddr,
23015 plus_constant (XEXP (m_tramp, 0),
23016 offset - (MEM_P (chain) ? 1 : 0)),
23017 NULL_RTX, 1, OPTAB_DIRECT);
23018 emit_move_insn (mem, disp);
23021 gcc_assert (offset <= TRAMPOLINE_SIZE);
23023 #ifdef HAVE_ENABLE_EXECUTE_STACK
23024 #ifdef CHECK_EXECUTE_STACK_ENABLED
23025 if (CHECK_EXECUTE_STACK_ENABLED)
23027 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23028 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23032 /* The following file contains several enumerations and data structures
23033 built from the definitions in i386-builtin-types.def. */
23035 #include "i386-builtin-types.inc"
23037 /* Table for the ix86 builtin non-function types. */
23038 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23040 /* Retrieve an element from the above table, building some of
23041 the types lazily. */
23044 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23046 unsigned int index;
23049 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23051 type = ix86_builtin_type_tab[(int) tcode];
23055 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23056 if (tcode <= IX86_BT_LAST_VECT)
23058 enum machine_mode mode;
23060 index = tcode - IX86_BT_LAST_PRIM - 1;
23061 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23062 mode = ix86_builtin_type_vect_mode[index];
23064 type = build_vector_type_for_mode (itype, mode);
23070 index = tcode - IX86_BT_LAST_VECT - 1;
23071 if (tcode <= IX86_BT_LAST_PTR)
23072 quals = TYPE_UNQUALIFIED;
23074 quals = TYPE_QUAL_CONST;
23076 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23077 if (quals != TYPE_UNQUALIFIED)
23078 itype = build_qualified_type (itype, quals);
23080 type = build_pointer_type (itype);
23083 ix86_builtin_type_tab[(int) tcode] = type;
23087 /* Table for the ix86 builtin function types. */
23088 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23090 /* Retrieve an element from the above table, building some of
23091 the types lazily. */
23094 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23098 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23100 type = ix86_builtin_func_type_tab[(int) tcode];
23104 if (tcode <= IX86_BT_LAST_FUNC)
23106 unsigned start = ix86_builtin_func_start[(int) tcode];
23107 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23108 tree rtype, atype, args = void_list_node;
23111 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23112 for (i = after - 1; i > start; --i)
23114 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23115 args = tree_cons (NULL, atype, args);
23118 type = build_function_type (rtype, args);
23122 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23123 enum ix86_builtin_func_type icode;
23125 icode = ix86_builtin_func_alias_base[index];
23126 type = ix86_get_builtin_func_type (icode);
23129 ix86_builtin_func_type_tab[(int) tcode] = type;
23134 /* Codes for all the SSE/MMX builtins. */
23137 IX86_BUILTIN_ADDPS,
23138 IX86_BUILTIN_ADDSS,
23139 IX86_BUILTIN_DIVPS,
23140 IX86_BUILTIN_DIVSS,
23141 IX86_BUILTIN_MULPS,
23142 IX86_BUILTIN_MULSS,
23143 IX86_BUILTIN_SUBPS,
23144 IX86_BUILTIN_SUBSS,
23146 IX86_BUILTIN_CMPEQPS,
23147 IX86_BUILTIN_CMPLTPS,
23148 IX86_BUILTIN_CMPLEPS,
23149 IX86_BUILTIN_CMPGTPS,
23150 IX86_BUILTIN_CMPGEPS,
23151 IX86_BUILTIN_CMPNEQPS,
23152 IX86_BUILTIN_CMPNLTPS,
23153 IX86_BUILTIN_CMPNLEPS,
23154 IX86_BUILTIN_CMPNGTPS,
23155 IX86_BUILTIN_CMPNGEPS,
23156 IX86_BUILTIN_CMPORDPS,
23157 IX86_BUILTIN_CMPUNORDPS,
23158 IX86_BUILTIN_CMPEQSS,
23159 IX86_BUILTIN_CMPLTSS,
23160 IX86_BUILTIN_CMPLESS,
23161 IX86_BUILTIN_CMPNEQSS,
23162 IX86_BUILTIN_CMPNLTSS,
23163 IX86_BUILTIN_CMPNLESS,
23164 IX86_BUILTIN_CMPNGTSS,
23165 IX86_BUILTIN_CMPNGESS,
23166 IX86_BUILTIN_CMPORDSS,
23167 IX86_BUILTIN_CMPUNORDSS,
23169 IX86_BUILTIN_COMIEQSS,
23170 IX86_BUILTIN_COMILTSS,
23171 IX86_BUILTIN_COMILESS,
23172 IX86_BUILTIN_COMIGTSS,
23173 IX86_BUILTIN_COMIGESS,
23174 IX86_BUILTIN_COMINEQSS,
23175 IX86_BUILTIN_UCOMIEQSS,
23176 IX86_BUILTIN_UCOMILTSS,
23177 IX86_BUILTIN_UCOMILESS,
23178 IX86_BUILTIN_UCOMIGTSS,
23179 IX86_BUILTIN_UCOMIGESS,
23180 IX86_BUILTIN_UCOMINEQSS,
23182 IX86_BUILTIN_CVTPI2PS,
23183 IX86_BUILTIN_CVTPS2PI,
23184 IX86_BUILTIN_CVTSI2SS,
23185 IX86_BUILTIN_CVTSI642SS,
23186 IX86_BUILTIN_CVTSS2SI,
23187 IX86_BUILTIN_CVTSS2SI64,
23188 IX86_BUILTIN_CVTTPS2PI,
23189 IX86_BUILTIN_CVTTSS2SI,
23190 IX86_BUILTIN_CVTTSS2SI64,
23192 IX86_BUILTIN_MAXPS,
23193 IX86_BUILTIN_MAXSS,
23194 IX86_BUILTIN_MINPS,
23195 IX86_BUILTIN_MINSS,
23197 IX86_BUILTIN_LOADUPS,
23198 IX86_BUILTIN_STOREUPS,
23199 IX86_BUILTIN_MOVSS,
23201 IX86_BUILTIN_MOVHLPS,
23202 IX86_BUILTIN_MOVLHPS,
23203 IX86_BUILTIN_LOADHPS,
23204 IX86_BUILTIN_LOADLPS,
23205 IX86_BUILTIN_STOREHPS,
23206 IX86_BUILTIN_STORELPS,
23208 IX86_BUILTIN_MASKMOVQ,
23209 IX86_BUILTIN_MOVMSKPS,
23210 IX86_BUILTIN_PMOVMSKB,
23212 IX86_BUILTIN_MOVNTPS,
23213 IX86_BUILTIN_MOVNTQ,
23215 IX86_BUILTIN_LOADDQU,
23216 IX86_BUILTIN_STOREDQU,
23218 IX86_BUILTIN_PACKSSWB,
23219 IX86_BUILTIN_PACKSSDW,
23220 IX86_BUILTIN_PACKUSWB,
23222 IX86_BUILTIN_PADDB,
23223 IX86_BUILTIN_PADDW,
23224 IX86_BUILTIN_PADDD,
23225 IX86_BUILTIN_PADDQ,
23226 IX86_BUILTIN_PADDSB,
23227 IX86_BUILTIN_PADDSW,
23228 IX86_BUILTIN_PADDUSB,
23229 IX86_BUILTIN_PADDUSW,
23230 IX86_BUILTIN_PSUBB,
23231 IX86_BUILTIN_PSUBW,
23232 IX86_BUILTIN_PSUBD,
23233 IX86_BUILTIN_PSUBQ,
23234 IX86_BUILTIN_PSUBSB,
23235 IX86_BUILTIN_PSUBSW,
23236 IX86_BUILTIN_PSUBUSB,
23237 IX86_BUILTIN_PSUBUSW,
23240 IX86_BUILTIN_PANDN,
23244 IX86_BUILTIN_PAVGB,
23245 IX86_BUILTIN_PAVGW,
23247 IX86_BUILTIN_PCMPEQB,
23248 IX86_BUILTIN_PCMPEQW,
23249 IX86_BUILTIN_PCMPEQD,
23250 IX86_BUILTIN_PCMPGTB,
23251 IX86_BUILTIN_PCMPGTW,
23252 IX86_BUILTIN_PCMPGTD,
23254 IX86_BUILTIN_PMADDWD,
23256 IX86_BUILTIN_PMAXSW,
23257 IX86_BUILTIN_PMAXUB,
23258 IX86_BUILTIN_PMINSW,
23259 IX86_BUILTIN_PMINUB,
23261 IX86_BUILTIN_PMULHUW,
23262 IX86_BUILTIN_PMULHW,
23263 IX86_BUILTIN_PMULLW,
23265 IX86_BUILTIN_PSADBW,
23266 IX86_BUILTIN_PSHUFW,
23268 IX86_BUILTIN_PSLLW,
23269 IX86_BUILTIN_PSLLD,
23270 IX86_BUILTIN_PSLLQ,
23271 IX86_BUILTIN_PSRAW,
23272 IX86_BUILTIN_PSRAD,
23273 IX86_BUILTIN_PSRLW,
23274 IX86_BUILTIN_PSRLD,
23275 IX86_BUILTIN_PSRLQ,
23276 IX86_BUILTIN_PSLLWI,
23277 IX86_BUILTIN_PSLLDI,
23278 IX86_BUILTIN_PSLLQI,
23279 IX86_BUILTIN_PSRAWI,
23280 IX86_BUILTIN_PSRADI,
23281 IX86_BUILTIN_PSRLWI,
23282 IX86_BUILTIN_PSRLDI,
23283 IX86_BUILTIN_PSRLQI,
23285 IX86_BUILTIN_PUNPCKHBW,
23286 IX86_BUILTIN_PUNPCKHWD,
23287 IX86_BUILTIN_PUNPCKHDQ,
23288 IX86_BUILTIN_PUNPCKLBW,
23289 IX86_BUILTIN_PUNPCKLWD,
23290 IX86_BUILTIN_PUNPCKLDQ,
23292 IX86_BUILTIN_SHUFPS,
23294 IX86_BUILTIN_RCPPS,
23295 IX86_BUILTIN_RCPSS,
23296 IX86_BUILTIN_RSQRTPS,
23297 IX86_BUILTIN_RSQRTPS_NR,
23298 IX86_BUILTIN_RSQRTSS,
23299 IX86_BUILTIN_RSQRTF,
23300 IX86_BUILTIN_SQRTPS,
23301 IX86_BUILTIN_SQRTPS_NR,
23302 IX86_BUILTIN_SQRTSS,
23304 IX86_BUILTIN_UNPCKHPS,
23305 IX86_BUILTIN_UNPCKLPS,
23307 IX86_BUILTIN_ANDPS,
23308 IX86_BUILTIN_ANDNPS,
23310 IX86_BUILTIN_XORPS,
23313 IX86_BUILTIN_LDMXCSR,
23314 IX86_BUILTIN_STMXCSR,
23315 IX86_BUILTIN_SFENCE,
23317 /* 3DNow! Original */
23318 IX86_BUILTIN_FEMMS,
23319 IX86_BUILTIN_PAVGUSB,
23320 IX86_BUILTIN_PF2ID,
23321 IX86_BUILTIN_PFACC,
23322 IX86_BUILTIN_PFADD,
23323 IX86_BUILTIN_PFCMPEQ,
23324 IX86_BUILTIN_PFCMPGE,
23325 IX86_BUILTIN_PFCMPGT,
23326 IX86_BUILTIN_PFMAX,
23327 IX86_BUILTIN_PFMIN,
23328 IX86_BUILTIN_PFMUL,
23329 IX86_BUILTIN_PFRCP,
23330 IX86_BUILTIN_PFRCPIT1,
23331 IX86_BUILTIN_PFRCPIT2,
23332 IX86_BUILTIN_PFRSQIT1,
23333 IX86_BUILTIN_PFRSQRT,
23334 IX86_BUILTIN_PFSUB,
23335 IX86_BUILTIN_PFSUBR,
23336 IX86_BUILTIN_PI2FD,
23337 IX86_BUILTIN_PMULHRW,
23339 /* 3DNow! Athlon Extensions */
23340 IX86_BUILTIN_PF2IW,
23341 IX86_BUILTIN_PFNACC,
23342 IX86_BUILTIN_PFPNACC,
23343 IX86_BUILTIN_PI2FW,
23344 IX86_BUILTIN_PSWAPDSI,
23345 IX86_BUILTIN_PSWAPDSF,
23348 IX86_BUILTIN_ADDPD,
23349 IX86_BUILTIN_ADDSD,
23350 IX86_BUILTIN_DIVPD,
23351 IX86_BUILTIN_DIVSD,
23352 IX86_BUILTIN_MULPD,
23353 IX86_BUILTIN_MULSD,
23354 IX86_BUILTIN_SUBPD,
23355 IX86_BUILTIN_SUBSD,
23357 IX86_BUILTIN_CMPEQPD,
23358 IX86_BUILTIN_CMPLTPD,
23359 IX86_BUILTIN_CMPLEPD,
23360 IX86_BUILTIN_CMPGTPD,
23361 IX86_BUILTIN_CMPGEPD,
23362 IX86_BUILTIN_CMPNEQPD,
23363 IX86_BUILTIN_CMPNLTPD,
23364 IX86_BUILTIN_CMPNLEPD,
23365 IX86_BUILTIN_CMPNGTPD,
23366 IX86_BUILTIN_CMPNGEPD,
23367 IX86_BUILTIN_CMPORDPD,
23368 IX86_BUILTIN_CMPUNORDPD,
23369 IX86_BUILTIN_CMPEQSD,
23370 IX86_BUILTIN_CMPLTSD,
23371 IX86_BUILTIN_CMPLESD,
23372 IX86_BUILTIN_CMPNEQSD,
23373 IX86_BUILTIN_CMPNLTSD,
23374 IX86_BUILTIN_CMPNLESD,
23375 IX86_BUILTIN_CMPORDSD,
23376 IX86_BUILTIN_CMPUNORDSD,
23378 IX86_BUILTIN_COMIEQSD,
23379 IX86_BUILTIN_COMILTSD,
23380 IX86_BUILTIN_COMILESD,
23381 IX86_BUILTIN_COMIGTSD,
23382 IX86_BUILTIN_COMIGESD,
23383 IX86_BUILTIN_COMINEQSD,
23384 IX86_BUILTIN_UCOMIEQSD,
23385 IX86_BUILTIN_UCOMILTSD,
23386 IX86_BUILTIN_UCOMILESD,
23387 IX86_BUILTIN_UCOMIGTSD,
23388 IX86_BUILTIN_UCOMIGESD,
23389 IX86_BUILTIN_UCOMINEQSD,
23391 IX86_BUILTIN_MAXPD,
23392 IX86_BUILTIN_MAXSD,
23393 IX86_BUILTIN_MINPD,
23394 IX86_BUILTIN_MINSD,
23396 IX86_BUILTIN_ANDPD,
23397 IX86_BUILTIN_ANDNPD,
23399 IX86_BUILTIN_XORPD,
23401 IX86_BUILTIN_SQRTPD,
23402 IX86_BUILTIN_SQRTSD,
23404 IX86_BUILTIN_UNPCKHPD,
23405 IX86_BUILTIN_UNPCKLPD,
23407 IX86_BUILTIN_SHUFPD,
23409 IX86_BUILTIN_LOADUPD,
23410 IX86_BUILTIN_STOREUPD,
23411 IX86_BUILTIN_MOVSD,
23413 IX86_BUILTIN_LOADHPD,
23414 IX86_BUILTIN_LOADLPD,
23416 IX86_BUILTIN_CVTDQ2PD,
23417 IX86_BUILTIN_CVTDQ2PS,
23419 IX86_BUILTIN_CVTPD2DQ,
23420 IX86_BUILTIN_CVTPD2PI,
23421 IX86_BUILTIN_CVTPD2PS,
23422 IX86_BUILTIN_CVTTPD2DQ,
23423 IX86_BUILTIN_CVTTPD2PI,
23425 IX86_BUILTIN_CVTPI2PD,
23426 IX86_BUILTIN_CVTSI2SD,
23427 IX86_BUILTIN_CVTSI642SD,
23429 IX86_BUILTIN_CVTSD2SI,
23430 IX86_BUILTIN_CVTSD2SI64,
23431 IX86_BUILTIN_CVTSD2SS,
23432 IX86_BUILTIN_CVTSS2SD,
23433 IX86_BUILTIN_CVTTSD2SI,
23434 IX86_BUILTIN_CVTTSD2SI64,
23436 IX86_BUILTIN_CVTPS2DQ,
23437 IX86_BUILTIN_CVTPS2PD,
23438 IX86_BUILTIN_CVTTPS2DQ,
23440 IX86_BUILTIN_MOVNTI,
23441 IX86_BUILTIN_MOVNTPD,
23442 IX86_BUILTIN_MOVNTDQ,
23444 IX86_BUILTIN_MOVQ128,
23447 IX86_BUILTIN_MASKMOVDQU,
23448 IX86_BUILTIN_MOVMSKPD,
23449 IX86_BUILTIN_PMOVMSKB128,
23451 IX86_BUILTIN_PACKSSWB128,
23452 IX86_BUILTIN_PACKSSDW128,
23453 IX86_BUILTIN_PACKUSWB128,
23455 IX86_BUILTIN_PADDB128,
23456 IX86_BUILTIN_PADDW128,
23457 IX86_BUILTIN_PADDD128,
23458 IX86_BUILTIN_PADDQ128,
23459 IX86_BUILTIN_PADDSB128,
23460 IX86_BUILTIN_PADDSW128,
23461 IX86_BUILTIN_PADDUSB128,
23462 IX86_BUILTIN_PADDUSW128,
23463 IX86_BUILTIN_PSUBB128,
23464 IX86_BUILTIN_PSUBW128,
23465 IX86_BUILTIN_PSUBD128,
23466 IX86_BUILTIN_PSUBQ128,
23467 IX86_BUILTIN_PSUBSB128,
23468 IX86_BUILTIN_PSUBSW128,
23469 IX86_BUILTIN_PSUBUSB128,
23470 IX86_BUILTIN_PSUBUSW128,
23472 IX86_BUILTIN_PAND128,
23473 IX86_BUILTIN_PANDN128,
23474 IX86_BUILTIN_POR128,
23475 IX86_BUILTIN_PXOR128,
23477 IX86_BUILTIN_PAVGB128,
23478 IX86_BUILTIN_PAVGW128,
23480 IX86_BUILTIN_PCMPEQB128,
23481 IX86_BUILTIN_PCMPEQW128,
23482 IX86_BUILTIN_PCMPEQD128,
23483 IX86_BUILTIN_PCMPGTB128,
23484 IX86_BUILTIN_PCMPGTW128,
23485 IX86_BUILTIN_PCMPGTD128,
23487 IX86_BUILTIN_PMADDWD128,
23489 IX86_BUILTIN_PMAXSW128,
23490 IX86_BUILTIN_PMAXUB128,
23491 IX86_BUILTIN_PMINSW128,
23492 IX86_BUILTIN_PMINUB128,
23494 IX86_BUILTIN_PMULUDQ,
23495 IX86_BUILTIN_PMULUDQ128,
23496 IX86_BUILTIN_PMULHUW128,
23497 IX86_BUILTIN_PMULHW128,
23498 IX86_BUILTIN_PMULLW128,
23500 IX86_BUILTIN_PSADBW128,
23501 IX86_BUILTIN_PSHUFHW,
23502 IX86_BUILTIN_PSHUFLW,
23503 IX86_BUILTIN_PSHUFD,
23505 IX86_BUILTIN_PSLLDQI128,
23506 IX86_BUILTIN_PSLLWI128,
23507 IX86_BUILTIN_PSLLDI128,
23508 IX86_BUILTIN_PSLLQI128,
23509 IX86_BUILTIN_PSRAWI128,
23510 IX86_BUILTIN_PSRADI128,
23511 IX86_BUILTIN_PSRLDQI128,
23512 IX86_BUILTIN_PSRLWI128,
23513 IX86_BUILTIN_PSRLDI128,
23514 IX86_BUILTIN_PSRLQI128,
23516 IX86_BUILTIN_PSLLDQ128,
23517 IX86_BUILTIN_PSLLW128,
23518 IX86_BUILTIN_PSLLD128,
23519 IX86_BUILTIN_PSLLQ128,
23520 IX86_BUILTIN_PSRAW128,
23521 IX86_BUILTIN_PSRAD128,
23522 IX86_BUILTIN_PSRLW128,
23523 IX86_BUILTIN_PSRLD128,
23524 IX86_BUILTIN_PSRLQ128,
23526 IX86_BUILTIN_PUNPCKHBW128,
23527 IX86_BUILTIN_PUNPCKHWD128,
23528 IX86_BUILTIN_PUNPCKHDQ128,
23529 IX86_BUILTIN_PUNPCKHQDQ128,
23530 IX86_BUILTIN_PUNPCKLBW128,
23531 IX86_BUILTIN_PUNPCKLWD128,
23532 IX86_BUILTIN_PUNPCKLDQ128,
23533 IX86_BUILTIN_PUNPCKLQDQ128,
23535 IX86_BUILTIN_CLFLUSH,
23536 IX86_BUILTIN_MFENCE,
23537 IX86_BUILTIN_LFENCE,
23538 IX86_BUILTIN_PAUSE,
23540 IX86_BUILTIN_BSRSI,
23541 IX86_BUILTIN_BSRDI,
23542 IX86_BUILTIN_RDPMC,
23543 IX86_BUILTIN_RDTSC,
23544 IX86_BUILTIN_RDTSCP,
23545 IX86_BUILTIN_ROLQI,
23546 IX86_BUILTIN_ROLHI,
23547 IX86_BUILTIN_RORQI,
23548 IX86_BUILTIN_RORHI,
23551 IX86_BUILTIN_ADDSUBPS,
23552 IX86_BUILTIN_HADDPS,
23553 IX86_BUILTIN_HSUBPS,
23554 IX86_BUILTIN_MOVSHDUP,
23555 IX86_BUILTIN_MOVSLDUP,
23556 IX86_BUILTIN_ADDSUBPD,
23557 IX86_BUILTIN_HADDPD,
23558 IX86_BUILTIN_HSUBPD,
23559 IX86_BUILTIN_LDDQU,
23561 IX86_BUILTIN_MONITOR,
23562 IX86_BUILTIN_MWAIT,
23565 IX86_BUILTIN_PHADDW,
23566 IX86_BUILTIN_PHADDD,
23567 IX86_BUILTIN_PHADDSW,
23568 IX86_BUILTIN_PHSUBW,
23569 IX86_BUILTIN_PHSUBD,
23570 IX86_BUILTIN_PHSUBSW,
23571 IX86_BUILTIN_PMADDUBSW,
23572 IX86_BUILTIN_PMULHRSW,
23573 IX86_BUILTIN_PSHUFB,
23574 IX86_BUILTIN_PSIGNB,
23575 IX86_BUILTIN_PSIGNW,
23576 IX86_BUILTIN_PSIGND,
23577 IX86_BUILTIN_PALIGNR,
23578 IX86_BUILTIN_PABSB,
23579 IX86_BUILTIN_PABSW,
23580 IX86_BUILTIN_PABSD,
23582 IX86_BUILTIN_PHADDW128,
23583 IX86_BUILTIN_PHADDD128,
23584 IX86_BUILTIN_PHADDSW128,
23585 IX86_BUILTIN_PHSUBW128,
23586 IX86_BUILTIN_PHSUBD128,
23587 IX86_BUILTIN_PHSUBSW128,
23588 IX86_BUILTIN_PMADDUBSW128,
23589 IX86_BUILTIN_PMULHRSW128,
23590 IX86_BUILTIN_PSHUFB128,
23591 IX86_BUILTIN_PSIGNB128,
23592 IX86_BUILTIN_PSIGNW128,
23593 IX86_BUILTIN_PSIGND128,
23594 IX86_BUILTIN_PALIGNR128,
23595 IX86_BUILTIN_PABSB128,
23596 IX86_BUILTIN_PABSW128,
23597 IX86_BUILTIN_PABSD128,
23599 /* AMDFAM10 - SSE4A New Instructions. */
23600 IX86_BUILTIN_MOVNTSD,
23601 IX86_BUILTIN_MOVNTSS,
23602 IX86_BUILTIN_EXTRQI,
23603 IX86_BUILTIN_EXTRQ,
23604 IX86_BUILTIN_INSERTQI,
23605 IX86_BUILTIN_INSERTQ,
23608 IX86_BUILTIN_BLENDPD,
23609 IX86_BUILTIN_BLENDPS,
23610 IX86_BUILTIN_BLENDVPD,
23611 IX86_BUILTIN_BLENDVPS,
23612 IX86_BUILTIN_PBLENDVB128,
23613 IX86_BUILTIN_PBLENDW128,
23618 IX86_BUILTIN_INSERTPS128,
23620 IX86_BUILTIN_MOVNTDQA,
23621 IX86_BUILTIN_MPSADBW128,
23622 IX86_BUILTIN_PACKUSDW128,
23623 IX86_BUILTIN_PCMPEQQ,
23624 IX86_BUILTIN_PHMINPOSUW128,
23626 IX86_BUILTIN_PMAXSB128,
23627 IX86_BUILTIN_PMAXSD128,
23628 IX86_BUILTIN_PMAXUD128,
23629 IX86_BUILTIN_PMAXUW128,
23631 IX86_BUILTIN_PMINSB128,
23632 IX86_BUILTIN_PMINSD128,
23633 IX86_BUILTIN_PMINUD128,
23634 IX86_BUILTIN_PMINUW128,
23636 IX86_BUILTIN_PMOVSXBW128,
23637 IX86_BUILTIN_PMOVSXBD128,
23638 IX86_BUILTIN_PMOVSXBQ128,
23639 IX86_BUILTIN_PMOVSXWD128,
23640 IX86_BUILTIN_PMOVSXWQ128,
23641 IX86_BUILTIN_PMOVSXDQ128,
23643 IX86_BUILTIN_PMOVZXBW128,
23644 IX86_BUILTIN_PMOVZXBD128,
23645 IX86_BUILTIN_PMOVZXBQ128,
23646 IX86_BUILTIN_PMOVZXWD128,
23647 IX86_BUILTIN_PMOVZXWQ128,
23648 IX86_BUILTIN_PMOVZXDQ128,
23650 IX86_BUILTIN_PMULDQ128,
23651 IX86_BUILTIN_PMULLD128,
23653 IX86_BUILTIN_ROUNDPD,
23654 IX86_BUILTIN_ROUNDPS,
23655 IX86_BUILTIN_ROUNDSD,
23656 IX86_BUILTIN_ROUNDSS,
23658 IX86_BUILTIN_FLOORPD,
23659 IX86_BUILTIN_CEILPD,
23660 IX86_BUILTIN_TRUNCPD,
23661 IX86_BUILTIN_RINTPD,
23662 IX86_BUILTIN_FLOORPS,
23663 IX86_BUILTIN_CEILPS,
23664 IX86_BUILTIN_TRUNCPS,
23665 IX86_BUILTIN_RINTPS,
23667 IX86_BUILTIN_PTESTZ,
23668 IX86_BUILTIN_PTESTC,
23669 IX86_BUILTIN_PTESTNZC,
23671 IX86_BUILTIN_VEC_INIT_V2SI,
23672 IX86_BUILTIN_VEC_INIT_V4HI,
23673 IX86_BUILTIN_VEC_INIT_V8QI,
23674 IX86_BUILTIN_VEC_EXT_V2DF,
23675 IX86_BUILTIN_VEC_EXT_V2DI,
23676 IX86_BUILTIN_VEC_EXT_V4SF,
23677 IX86_BUILTIN_VEC_EXT_V4SI,
23678 IX86_BUILTIN_VEC_EXT_V8HI,
23679 IX86_BUILTIN_VEC_EXT_V2SI,
23680 IX86_BUILTIN_VEC_EXT_V4HI,
23681 IX86_BUILTIN_VEC_EXT_V16QI,
23682 IX86_BUILTIN_VEC_SET_V2DI,
23683 IX86_BUILTIN_VEC_SET_V4SF,
23684 IX86_BUILTIN_VEC_SET_V4SI,
23685 IX86_BUILTIN_VEC_SET_V8HI,
23686 IX86_BUILTIN_VEC_SET_V4HI,
23687 IX86_BUILTIN_VEC_SET_V16QI,
23689 IX86_BUILTIN_VEC_PACK_SFIX,
23692 IX86_BUILTIN_CRC32QI,
23693 IX86_BUILTIN_CRC32HI,
23694 IX86_BUILTIN_CRC32SI,
23695 IX86_BUILTIN_CRC32DI,
23697 IX86_BUILTIN_PCMPESTRI128,
23698 IX86_BUILTIN_PCMPESTRM128,
23699 IX86_BUILTIN_PCMPESTRA128,
23700 IX86_BUILTIN_PCMPESTRC128,
23701 IX86_BUILTIN_PCMPESTRO128,
23702 IX86_BUILTIN_PCMPESTRS128,
23703 IX86_BUILTIN_PCMPESTRZ128,
23704 IX86_BUILTIN_PCMPISTRI128,
23705 IX86_BUILTIN_PCMPISTRM128,
23706 IX86_BUILTIN_PCMPISTRA128,
23707 IX86_BUILTIN_PCMPISTRC128,
23708 IX86_BUILTIN_PCMPISTRO128,
23709 IX86_BUILTIN_PCMPISTRS128,
23710 IX86_BUILTIN_PCMPISTRZ128,
23712 IX86_BUILTIN_PCMPGTQ,
23714 /* AES instructions */
23715 IX86_BUILTIN_AESENC128,
23716 IX86_BUILTIN_AESENCLAST128,
23717 IX86_BUILTIN_AESDEC128,
23718 IX86_BUILTIN_AESDECLAST128,
23719 IX86_BUILTIN_AESIMC128,
23720 IX86_BUILTIN_AESKEYGENASSIST128,
23722 /* PCLMUL instruction */
23723 IX86_BUILTIN_PCLMULQDQ128,
23726 IX86_BUILTIN_ADDPD256,
23727 IX86_BUILTIN_ADDPS256,
23728 IX86_BUILTIN_ADDSUBPD256,
23729 IX86_BUILTIN_ADDSUBPS256,
23730 IX86_BUILTIN_ANDPD256,
23731 IX86_BUILTIN_ANDPS256,
23732 IX86_BUILTIN_ANDNPD256,
23733 IX86_BUILTIN_ANDNPS256,
23734 IX86_BUILTIN_BLENDPD256,
23735 IX86_BUILTIN_BLENDPS256,
23736 IX86_BUILTIN_BLENDVPD256,
23737 IX86_BUILTIN_BLENDVPS256,
23738 IX86_BUILTIN_DIVPD256,
23739 IX86_BUILTIN_DIVPS256,
23740 IX86_BUILTIN_DPPS256,
23741 IX86_BUILTIN_HADDPD256,
23742 IX86_BUILTIN_HADDPS256,
23743 IX86_BUILTIN_HSUBPD256,
23744 IX86_BUILTIN_HSUBPS256,
23745 IX86_BUILTIN_MAXPD256,
23746 IX86_BUILTIN_MAXPS256,
23747 IX86_BUILTIN_MINPD256,
23748 IX86_BUILTIN_MINPS256,
23749 IX86_BUILTIN_MULPD256,
23750 IX86_BUILTIN_MULPS256,
23751 IX86_BUILTIN_ORPD256,
23752 IX86_BUILTIN_ORPS256,
23753 IX86_BUILTIN_SHUFPD256,
23754 IX86_BUILTIN_SHUFPS256,
23755 IX86_BUILTIN_SUBPD256,
23756 IX86_BUILTIN_SUBPS256,
23757 IX86_BUILTIN_XORPD256,
23758 IX86_BUILTIN_XORPS256,
23759 IX86_BUILTIN_CMPSD,
23760 IX86_BUILTIN_CMPSS,
23761 IX86_BUILTIN_CMPPD,
23762 IX86_BUILTIN_CMPPS,
23763 IX86_BUILTIN_CMPPD256,
23764 IX86_BUILTIN_CMPPS256,
23765 IX86_BUILTIN_CVTDQ2PD256,
23766 IX86_BUILTIN_CVTDQ2PS256,
23767 IX86_BUILTIN_CVTPD2PS256,
23768 IX86_BUILTIN_CVTPS2DQ256,
23769 IX86_BUILTIN_CVTPS2PD256,
23770 IX86_BUILTIN_CVTTPD2DQ256,
23771 IX86_BUILTIN_CVTPD2DQ256,
23772 IX86_BUILTIN_CVTTPS2DQ256,
23773 IX86_BUILTIN_EXTRACTF128PD256,
23774 IX86_BUILTIN_EXTRACTF128PS256,
23775 IX86_BUILTIN_EXTRACTF128SI256,
23776 IX86_BUILTIN_VZEROALL,
23777 IX86_BUILTIN_VZEROUPPER,
23778 IX86_BUILTIN_VPERMILVARPD,
23779 IX86_BUILTIN_VPERMILVARPS,
23780 IX86_BUILTIN_VPERMILVARPD256,
23781 IX86_BUILTIN_VPERMILVARPS256,
23782 IX86_BUILTIN_VPERMILPD,
23783 IX86_BUILTIN_VPERMILPS,
23784 IX86_BUILTIN_VPERMILPD256,
23785 IX86_BUILTIN_VPERMILPS256,
23786 IX86_BUILTIN_VPERMIL2PD,
23787 IX86_BUILTIN_VPERMIL2PS,
23788 IX86_BUILTIN_VPERMIL2PD256,
23789 IX86_BUILTIN_VPERMIL2PS256,
23790 IX86_BUILTIN_VPERM2F128PD256,
23791 IX86_BUILTIN_VPERM2F128PS256,
23792 IX86_BUILTIN_VPERM2F128SI256,
23793 IX86_BUILTIN_VBROADCASTSS,
23794 IX86_BUILTIN_VBROADCASTSD256,
23795 IX86_BUILTIN_VBROADCASTSS256,
23796 IX86_BUILTIN_VBROADCASTPD256,
23797 IX86_BUILTIN_VBROADCASTPS256,
23798 IX86_BUILTIN_VINSERTF128PD256,
23799 IX86_BUILTIN_VINSERTF128PS256,
23800 IX86_BUILTIN_VINSERTF128SI256,
23801 IX86_BUILTIN_LOADUPD256,
23802 IX86_BUILTIN_LOADUPS256,
23803 IX86_BUILTIN_STOREUPD256,
23804 IX86_BUILTIN_STOREUPS256,
23805 IX86_BUILTIN_LDDQU256,
23806 IX86_BUILTIN_MOVNTDQ256,
23807 IX86_BUILTIN_MOVNTPD256,
23808 IX86_BUILTIN_MOVNTPS256,
23809 IX86_BUILTIN_LOADDQU256,
23810 IX86_BUILTIN_STOREDQU256,
23811 IX86_BUILTIN_MASKLOADPD,
23812 IX86_BUILTIN_MASKLOADPS,
23813 IX86_BUILTIN_MASKSTOREPD,
23814 IX86_BUILTIN_MASKSTOREPS,
23815 IX86_BUILTIN_MASKLOADPD256,
23816 IX86_BUILTIN_MASKLOADPS256,
23817 IX86_BUILTIN_MASKSTOREPD256,
23818 IX86_BUILTIN_MASKSTOREPS256,
23819 IX86_BUILTIN_MOVSHDUP256,
23820 IX86_BUILTIN_MOVSLDUP256,
23821 IX86_BUILTIN_MOVDDUP256,
23823 IX86_BUILTIN_SQRTPD256,
23824 IX86_BUILTIN_SQRTPS256,
23825 IX86_BUILTIN_SQRTPS_NR256,
23826 IX86_BUILTIN_RSQRTPS256,
23827 IX86_BUILTIN_RSQRTPS_NR256,
23829 IX86_BUILTIN_RCPPS256,
23831 IX86_BUILTIN_ROUNDPD256,
23832 IX86_BUILTIN_ROUNDPS256,
23834 IX86_BUILTIN_FLOORPD256,
23835 IX86_BUILTIN_CEILPD256,
23836 IX86_BUILTIN_TRUNCPD256,
23837 IX86_BUILTIN_RINTPD256,
23838 IX86_BUILTIN_FLOORPS256,
23839 IX86_BUILTIN_CEILPS256,
23840 IX86_BUILTIN_TRUNCPS256,
23841 IX86_BUILTIN_RINTPS256,
23843 IX86_BUILTIN_UNPCKHPD256,
23844 IX86_BUILTIN_UNPCKLPD256,
23845 IX86_BUILTIN_UNPCKHPS256,
23846 IX86_BUILTIN_UNPCKLPS256,
23848 IX86_BUILTIN_SI256_SI,
23849 IX86_BUILTIN_PS256_PS,
23850 IX86_BUILTIN_PD256_PD,
23851 IX86_BUILTIN_SI_SI256,
23852 IX86_BUILTIN_PS_PS256,
23853 IX86_BUILTIN_PD_PD256,
23855 IX86_BUILTIN_VTESTZPD,
23856 IX86_BUILTIN_VTESTCPD,
23857 IX86_BUILTIN_VTESTNZCPD,
23858 IX86_BUILTIN_VTESTZPS,
23859 IX86_BUILTIN_VTESTCPS,
23860 IX86_BUILTIN_VTESTNZCPS,
23861 IX86_BUILTIN_VTESTZPD256,
23862 IX86_BUILTIN_VTESTCPD256,
23863 IX86_BUILTIN_VTESTNZCPD256,
23864 IX86_BUILTIN_VTESTZPS256,
23865 IX86_BUILTIN_VTESTCPS256,
23866 IX86_BUILTIN_VTESTNZCPS256,
23867 IX86_BUILTIN_PTESTZ256,
23868 IX86_BUILTIN_PTESTC256,
23869 IX86_BUILTIN_PTESTNZC256,
23871 IX86_BUILTIN_MOVMSKPD256,
23872 IX86_BUILTIN_MOVMSKPS256,
23874 /* TFmode support builtins. */
23876 IX86_BUILTIN_HUGE_VALQ,
23877 IX86_BUILTIN_FABSQ,
23878 IX86_BUILTIN_COPYSIGNQ,
23880 /* Vectorizer support builtins. */
23881 IX86_BUILTIN_CPYSGNPS,
23882 IX86_BUILTIN_CPYSGNPD,
23883 IX86_BUILTIN_CPYSGNPS256,
23884 IX86_BUILTIN_CPYSGNPD256,
23886 IX86_BUILTIN_CVTUDQ2PS,
23888 IX86_BUILTIN_VEC_PERM_V2DF,
23889 IX86_BUILTIN_VEC_PERM_V4SF,
23890 IX86_BUILTIN_VEC_PERM_V2DI,
23891 IX86_BUILTIN_VEC_PERM_V4SI,
23892 IX86_BUILTIN_VEC_PERM_V8HI,
23893 IX86_BUILTIN_VEC_PERM_V16QI,
23894 IX86_BUILTIN_VEC_PERM_V2DI_U,
23895 IX86_BUILTIN_VEC_PERM_V4SI_U,
23896 IX86_BUILTIN_VEC_PERM_V8HI_U,
23897 IX86_BUILTIN_VEC_PERM_V16QI_U,
23898 IX86_BUILTIN_VEC_PERM_V4DF,
23899 IX86_BUILTIN_VEC_PERM_V8SF,
23901 /* FMA4 and XOP instructions. */
23902 IX86_BUILTIN_VFMADDSS,
23903 IX86_BUILTIN_VFMADDSD,
23904 IX86_BUILTIN_VFMADDPS,
23905 IX86_BUILTIN_VFMADDPD,
23906 IX86_BUILTIN_VFMADDPS256,
23907 IX86_BUILTIN_VFMADDPD256,
23908 IX86_BUILTIN_VFMADDSUBPS,
23909 IX86_BUILTIN_VFMADDSUBPD,
23910 IX86_BUILTIN_VFMADDSUBPS256,
23911 IX86_BUILTIN_VFMADDSUBPD256,
23913 IX86_BUILTIN_VPCMOV,
23914 IX86_BUILTIN_VPCMOV_V2DI,
23915 IX86_BUILTIN_VPCMOV_V4SI,
23916 IX86_BUILTIN_VPCMOV_V8HI,
23917 IX86_BUILTIN_VPCMOV_V16QI,
23918 IX86_BUILTIN_VPCMOV_V4SF,
23919 IX86_BUILTIN_VPCMOV_V2DF,
23920 IX86_BUILTIN_VPCMOV256,
23921 IX86_BUILTIN_VPCMOV_V4DI256,
23922 IX86_BUILTIN_VPCMOV_V8SI256,
23923 IX86_BUILTIN_VPCMOV_V16HI256,
23924 IX86_BUILTIN_VPCMOV_V32QI256,
23925 IX86_BUILTIN_VPCMOV_V8SF256,
23926 IX86_BUILTIN_VPCMOV_V4DF256,
23928 IX86_BUILTIN_VPPERM,
23930 IX86_BUILTIN_VPMACSSWW,
23931 IX86_BUILTIN_VPMACSWW,
23932 IX86_BUILTIN_VPMACSSWD,
23933 IX86_BUILTIN_VPMACSWD,
23934 IX86_BUILTIN_VPMACSSDD,
23935 IX86_BUILTIN_VPMACSDD,
23936 IX86_BUILTIN_VPMACSSDQL,
23937 IX86_BUILTIN_VPMACSSDQH,
23938 IX86_BUILTIN_VPMACSDQL,
23939 IX86_BUILTIN_VPMACSDQH,
23940 IX86_BUILTIN_VPMADCSSWD,
23941 IX86_BUILTIN_VPMADCSWD,
23943 IX86_BUILTIN_VPHADDBW,
23944 IX86_BUILTIN_VPHADDBD,
23945 IX86_BUILTIN_VPHADDBQ,
23946 IX86_BUILTIN_VPHADDWD,
23947 IX86_BUILTIN_VPHADDWQ,
23948 IX86_BUILTIN_VPHADDDQ,
23949 IX86_BUILTIN_VPHADDUBW,
23950 IX86_BUILTIN_VPHADDUBD,
23951 IX86_BUILTIN_VPHADDUBQ,
23952 IX86_BUILTIN_VPHADDUWD,
23953 IX86_BUILTIN_VPHADDUWQ,
23954 IX86_BUILTIN_VPHADDUDQ,
23955 IX86_BUILTIN_VPHSUBBW,
23956 IX86_BUILTIN_VPHSUBWD,
23957 IX86_BUILTIN_VPHSUBDQ,
23959 IX86_BUILTIN_VPROTB,
23960 IX86_BUILTIN_VPROTW,
23961 IX86_BUILTIN_VPROTD,
23962 IX86_BUILTIN_VPROTQ,
23963 IX86_BUILTIN_VPROTB_IMM,
23964 IX86_BUILTIN_VPROTW_IMM,
23965 IX86_BUILTIN_VPROTD_IMM,
23966 IX86_BUILTIN_VPROTQ_IMM,
23968 IX86_BUILTIN_VPSHLB,
23969 IX86_BUILTIN_VPSHLW,
23970 IX86_BUILTIN_VPSHLD,
23971 IX86_BUILTIN_VPSHLQ,
23972 IX86_BUILTIN_VPSHAB,
23973 IX86_BUILTIN_VPSHAW,
23974 IX86_BUILTIN_VPSHAD,
23975 IX86_BUILTIN_VPSHAQ,
23977 IX86_BUILTIN_VFRCZSS,
23978 IX86_BUILTIN_VFRCZSD,
23979 IX86_BUILTIN_VFRCZPS,
23980 IX86_BUILTIN_VFRCZPD,
23981 IX86_BUILTIN_VFRCZPS256,
23982 IX86_BUILTIN_VFRCZPD256,
23984 IX86_BUILTIN_VPCOMEQUB,
23985 IX86_BUILTIN_VPCOMNEUB,
23986 IX86_BUILTIN_VPCOMLTUB,
23987 IX86_BUILTIN_VPCOMLEUB,
23988 IX86_BUILTIN_VPCOMGTUB,
23989 IX86_BUILTIN_VPCOMGEUB,
23990 IX86_BUILTIN_VPCOMFALSEUB,
23991 IX86_BUILTIN_VPCOMTRUEUB,
23993 IX86_BUILTIN_VPCOMEQUW,
23994 IX86_BUILTIN_VPCOMNEUW,
23995 IX86_BUILTIN_VPCOMLTUW,
23996 IX86_BUILTIN_VPCOMLEUW,
23997 IX86_BUILTIN_VPCOMGTUW,
23998 IX86_BUILTIN_VPCOMGEUW,
23999 IX86_BUILTIN_VPCOMFALSEUW,
24000 IX86_BUILTIN_VPCOMTRUEUW,
24002 IX86_BUILTIN_VPCOMEQUD,
24003 IX86_BUILTIN_VPCOMNEUD,
24004 IX86_BUILTIN_VPCOMLTUD,
24005 IX86_BUILTIN_VPCOMLEUD,
24006 IX86_BUILTIN_VPCOMGTUD,
24007 IX86_BUILTIN_VPCOMGEUD,
24008 IX86_BUILTIN_VPCOMFALSEUD,
24009 IX86_BUILTIN_VPCOMTRUEUD,
24011 IX86_BUILTIN_VPCOMEQUQ,
24012 IX86_BUILTIN_VPCOMNEUQ,
24013 IX86_BUILTIN_VPCOMLTUQ,
24014 IX86_BUILTIN_VPCOMLEUQ,
24015 IX86_BUILTIN_VPCOMGTUQ,
24016 IX86_BUILTIN_VPCOMGEUQ,
24017 IX86_BUILTIN_VPCOMFALSEUQ,
24018 IX86_BUILTIN_VPCOMTRUEUQ,
24020 IX86_BUILTIN_VPCOMEQB,
24021 IX86_BUILTIN_VPCOMNEB,
24022 IX86_BUILTIN_VPCOMLTB,
24023 IX86_BUILTIN_VPCOMLEB,
24024 IX86_BUILTIN_VPCOMGTB,
24025 IX86_BUILTIN_VPCOMGEB,
24026 IX86_BUILTIN_VPCOMFALSEB,
24027 IX86_BUILTIN_VPCOMTRUEB,
24029 IX86_BUILTIN_VPCOMEQW,
24030 IX86_BUILTIN_VPCOMNEW,
24031 IX86_BUILTIN_VPCOMLTW,
24032 IX86_BUILTIN_VPCOMLEW,
24033 IX86_BUILTIN_VPCOMGTW,
24034 IX86_BUILTIN_VPCOMGEW,
24035 IX86_BUILTIN_VPCOMFALSEW,
24036 IX86_BUILTIN_VPCOMTRUEW,
24038 IX86_BUILTIN_VPCOMEQD,
24039 IX86_BUILTIN_VPCOMNED,
24040 IX86_BUILTIN_VPCOMLTD,
24041 IX86_BUILTIN_VPCOMLED,
24042 IX86_BUILTIN_VPCOMGTD,
24043 IX86_BUILTIN_VPCOMGED,
24044 IX86_BUILTIN_VPCOMFALSED,
24045 IX86_BUILTIN_VPCOMTRUED,
24047 IX86_BUILTIN_VPCOMEQQ,
24048 IX86_BUILTIN_VPCOMNEQ,
24049 IX86_BUILTIN_VPCOMLTQ,
24050 IX86_BUILTIN_VPCOMLEQ,
24051 IX86_BUILTIN_VPCOMGTQ,
24052 IX86_BUILTIN_VPCOMGEQ,
24053 IX86_BUILTIN_VPCOMFALSEQ,
24054 IX86_BUILTIN_VPCOMTRUEQ,
24056 /* LWP instructions. */
24057 IX86_BUILTIN_LLWPCB,
24058 IX86_BUILTIN_SLWPCB,
24059 IX86_BUILTIN_LWPVAL32,
24060 IX86_BUILTIN_LWPVAL64,
24061 IX86_BUILTIN_LWPINS32,
24062 IX86_BUILTIN_LWPINS64,
24066 /* BMI instructions. */
24067 IX86_BUILTIN_BEXTR32,
24068 IX86_BUILTIN_BEXTR64,
24071 /* TBM instructions. */
24072 IX86_BUILTIN_BEXTRI32,
24073 IX86_BUILTIN_BEXTRI64,
24076 /* FSGSBASE instructions. */
24077 IX86_BUILTIN_RDFSBASE32,
24078 IX86_BUILTIN_RDFSBASE64,
24079 IX86_BUILTIN_RDGSBASE32,
24080 IX86_BUILTIN_RDGSBASE64,
24081 IX86_BUILTIN_WRFSBASE32,
24082 IX86_BUILTIN_WRFSBASE64,
24083 IX86_BUILTIN_WRGSBASE32,
24084 IX86_BUILTIN_WRGSBASE64,
24086 /* RDRND instructions. */
24087 IX86_BUILTIN_RDRAND16_STEP,
24088 IX86_BUILTIN_RDRAND32_STEP,
24089 IX86_BUILTIN_RDRAND64_STEP,
24091 /* F16C instructions. */
24092 IX86_BUILTIN_CVTPH2PS,
24093 IX86_BUILTIN_CVTPH2PS256,
24094 IX86_BUILTIN_CVTPS2PH,
24095 IX86_BUILTIN_CVTPS2PH256,
24097 /* CFString built-in for darwin */
24098 IX86_BUILTIN_CFSTRING,
24103 /* Table for the ix86 builtin decls. */
24104 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24106 /* Table of all of the builtin functions that are possible with different ISA's
24107 but are waiting to be built until a function is declared to use that
24109 struct builtin_isa {
24110 const char *name; /* function name */
24111 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24112 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
24113 bool const_p; /* true if the declaration is constant */
24114 bool set_and_not_built_p;
24117 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24120 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24121 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24122 function decl in the ix86_builtins array. Returns the function decl or
24123 NULL_TREE, if the builtin was not added.
24125 If the front end has a special hook for builtin functions, delay adding
24126 builtin functions that aren't in the current ISA until the ISA is changed
24127 with function specific optimization. Doing so, can save about 300K for the
24128 default compiler. When the builtin is expanded, check at that time whether
24131 If the front end doesn't have a special hook, record all builtins, even if
24132 it isn't an instruction set in the current ISA in case the user uses
24133 function specific options for a different ISA, so that we don't get scope
24134 errors if a builtin is added in the middle of a function scope. */
24137 def_builtin (HOST_WIDE_INT mask, const char *name,
24138 enum ix86_builtin_func_type tcode,
24139 enum ix86_builtins code)
24141 tree decl = NULL_TREE;
24143 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24145 ix86_builtins_isa[(int) code].isa = mask;
24147 mask &= ~OPTION_MASK_ISA_64BIT;
24149 || (mask & ix86_isa_flags) != 0
24150 || (lang_hooks.builtin_function
24151 == lang_hooks.builtin_function_ext_scope))
24154 tree type = ix86_get_builtin_func_type (tcode);
24155 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24157 ix86_builtins[(int) code] = decl;
24158 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24162 ix86_builtins[(int) code] = NULL_TREE;
24163 ix86_builtins_isa[(int) code].tcode = tcode;
24164 ix86_builtins_isa[(int) code].name = name;
24165 ix86_builtins_isa[(int) code].const_p = false;
24166 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24173 /* Like def_builtin, but also marks the function decl "const". */
24176 def_builtin_const (HOST_WIDE_INT mask, const char *name,
24177 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24179 tree decl = def_builtin (mask, name, tcode, code);
24181 TREE_READONLY (decl) = 1;
24183 ix86_builtins_isa[(int) code].const_p = true;
24188 /* Add any new builtin functions for a given ISA that may not have been
24189 declared. This saves a bit of space compared to adding all of the
24190 declarations to the tree, even if we didn't use them. */
24193 ix86_add_new_builtins (HOST_WIDE_INT isa)
24197 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24199 if ((ix86_builtins_isa[i].isa & isa) != 0
24200 && ix86_builtins_isa[i].set_and_not_built_p)
24204 /* Don't define the builtin again. */
24205 ix86_builtins_isa[i].set_and_not_built_p = false;
24207 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24208 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24209 type, i, BUILT_IN_MD, NULL,
24212 ix86_builtins[i] = decl;
24213 if (ix86_builtins_isa[i].const_p)
24214 TREE_READONLY (decl) = 1;
24219 /* Bits for builtin_description.flag. */
24221 /* Set when we don't support the comparison natively, and should
24222 swap_comparison in order to support it. */
24223 #define BUILTIN_DESC_SWAP_OPERANDS 1
24225 struct builtin_description
24227 const HOST_WIDE_INT mask;
24228 const enum insn_code icode;
24229 const char *const name;
24230 const enum ix86_builtins code;
24231 const enum rtx_code comparison;
24235 static const struct builtin_description bdesc_comi[] =
24237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24263 static const struct builtin_description bdesc_pcmpestr[] =
24266 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24267 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24268 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24269 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24270 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24271 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24272 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24275 static const struct builtin_description bdesc_pcmpistr[] =
24278 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24279 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24280 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24281 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24282 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24283 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24284 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24287 /* Special builtins with variable number of arguments. */
24288 static const struct builtin_description bdesc_special_args[] =
24290 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24291 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24292 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24298 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24310 /* SSE or 3DNow!A */
24311 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24312 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24329 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24332 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24335 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24336 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24342 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24343 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24344 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24351 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24352 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24353 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24357 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24369 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24370 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24371 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24372 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24373 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24374 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24377 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24378 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24379 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24380 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24381 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24382 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24383 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24384 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24387 /* Builtins with variable number of arguments. */
24388 static const struct builtin_description bdesc_args[] =
24390 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24391 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24392 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24393 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24394 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24395 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24396 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24406 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24410 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24412 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24418 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24419 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24420 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24424 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24425 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24426 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24430 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24431 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24432 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24433 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24434 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24435 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24437 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24438 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24439 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24441 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24443 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24444 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24445 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24446 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24447 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24448 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24450 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24451 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24452 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24453 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24454 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24455 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24457 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24458 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24459 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24460 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24463 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24464 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24465 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24466 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24468 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24469 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24470 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24471 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24472 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24473 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24474 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24475 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24476 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24477 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24478 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24479 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24480 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24481 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24482 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24485 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24486 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24487 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24488 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24489 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24490 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24495 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24497 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24501 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24504 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24508 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24509 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24510 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24515 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24522 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24523 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24524 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24525 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24528 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24529 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24531 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24534 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24536 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24540 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24541 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24542 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24543 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24545 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24546 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24547 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24548 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24550 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24552 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24554 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24555 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24556 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24558 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24559 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24560 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24562 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24568 /* SSE MMX or 3Dnow!A */
24569 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24570 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24571 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24573 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24574 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24575 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24576 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24578 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24579 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24581 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24586 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24587 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24588 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24589 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24590 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24591 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24592 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24593 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24594 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24595 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24596 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24597 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24616 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24617 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24623 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24654 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24658 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24660 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24661 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24663 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24667 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24671 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24673 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24674 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24675 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24676 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24677 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24678 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24689 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24690 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24692 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24694 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24695 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24707 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24708 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24709 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24712 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24713 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24714 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24715 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24716 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24717 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24718 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24719 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24725 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24731 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24734 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24739 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24740 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24741 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24742 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24743 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24744 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24747 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24748 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24749 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24750 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24751 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24752 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24754 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24755 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24756 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24757 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24765 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24766 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24771 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24772 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24775 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24776 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24778 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24779 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24780 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24781 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24782 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24783 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24786 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24787 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24788 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24789 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24790 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24791 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24793 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24794 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24795 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24796 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24797 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24798 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24799 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24800 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24801 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24802 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24803 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24804 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24805 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24806 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24807 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24808 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24809 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24810 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24811 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24812 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24813 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24814 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24815 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24816 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24819 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24820 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24823 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24824 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24825 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24826 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24827 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24828 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24829 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24830 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24831 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24832 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24834 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24835 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24836 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24837 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24838 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24839 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24840 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24841 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24842 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24843 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24844 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24845 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24846 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24848 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24849 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24850 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24851 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24852 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24853 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24854 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24855 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24856 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24857 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24858 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24859 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24862 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24863 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24864 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24865 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24867 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
24868 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
24869 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
24870 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
24872 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
24873 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
24874 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
24875 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
24877 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24878 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24879 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24882 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24883 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24884 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24885 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24886 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24889 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24890 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24891 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24892 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24895 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24896 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24898 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24899 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24900 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24901 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24904 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24907 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24908 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24911 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24912 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24915 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24921 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24922 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24923 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24924 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24925 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24926 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24927 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24928 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24929 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24930 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24931 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24932 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24947 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24949 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24951 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24952 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24954 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24957 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24958 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24959 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24960 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24962 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24963 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24967 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24978 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24980 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24982 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
24990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
24991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
24992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
24994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
24995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
24996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
24997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
24999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25007 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25008 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25009 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25030 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25031 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25033 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25036 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25037 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25038 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25041 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25042 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25045 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25046 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25047 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25048 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25051 /* FMA4 and XOP. */
25052 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25053 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25054 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25055 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25056 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25057 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25058 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25059 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25060 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25061 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25062 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25063 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25064 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25065 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25066 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25067 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25068 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25069 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25070 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25071 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25072 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25073 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25074 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25075 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25076 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25077 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25078 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25079 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25080 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25081 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25082 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25083 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25084 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25085 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25086 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25087 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25088 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25089 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25090 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25091 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25092 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25093 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25094 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25095 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25096 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25097 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25098 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25099 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25100 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25101 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25102 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25103 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25105 static const struct builtin_description bdesc_multi_arg[] =
25107 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25108 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25109 UNKNOWN, (int)MULTI_ARG_3_SF },
25110 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25111 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25112 UNKNOWN, (int)MULTI_ARG_3_DF },
25114 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25115 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25116 UNKNOWN, (int)MULTI_ARG_3_SF },
25117 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25118 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25119 UNKNOWN, (int)MULTI_ARG_3_DF },
25120 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25121 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25122 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25123 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25124 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25125 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25127 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25128 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25129 UNKNOWN, (int)MULTI_ARG_3_SF },
25130 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25131 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25132 UNKNOWN, (int)MULTI_ARG_3_DF },
25133 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25134 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25135 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25136 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25137 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25138 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25300 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25301 in the current target ISA to allow the user to compile particular modules
25302 with different target specific options that differ from the command line
25305 ix86_init_mmx_sse_builtins (void)
25307 const struct builtin_description * d;
25308 enum ix86_builtin_func_type ftype;
25311 /* Add all special builtins with variable number of operands. */
25312 for (i = 0, d = bdesc_special_args;
25313 i < ARRAY_SIZE (bdesc_special_args);
25319 ftype = (enum ix86_builtin_func_type) d->flag;
25320 def_builtin (d->mask, d->name, ftype, d->code);
25323 /* Add all builtins with variable number of operands. */
25324 for (i = 0, d = bdesc_args;
25325 i < ARRAY_SIZE (bdesc_args);
25331 ftype = (enum ix86_builtin_func_type) d->flag;
25332 def_builtin_const (d->mask, d->name, ftype, d->code);
25335 /* pcmpestr[im] insns. */
25336 for (i = 0, d = bdesc_pcmpestr;
25337 i < ARRAY_SIZE (bdesc_pcmpestr);
25340 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25341 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25343 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25344 def_builtin_const (d->mask, d->name, ftype, d->code);
25347 /* pcmpistr[im] insns. */
25348 for (i = 0, d = bdesc_pcmpistr;
25349 i < ARRAY_SIZE (bdesc_pcmpistr);
25352 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25353 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25355 ftype = INT_FTYPE_V16QI_V16QI_INT;
25356 def_builtin_const (d->mask, d->name, ftype, d->code);
25359 /* comi/ucomi insns. */
25360 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25362 if (d->mask == OPTION_MASK_ISA_SSE2)
25363 ftype = INT_FTYPE_V2DF_V2DF;
25365 ftype = INT_FTYPE_V4SF_V4SF;
25366 def_builtin_const (d->mask, d->name, ftype, d->code);
25370 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25371 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25372 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25373 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25375 /* SSE or 3DNow!A */
25376 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25377 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25378 IX86_BUILTIN_MASKMOVQ);
25381 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25382 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25384 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25385 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25386 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25387 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25390 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25391 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25392 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25393 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25396 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25397 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25398 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25399 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25400 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25401 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25402 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25403 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25404 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25405 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25406 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25407 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25410 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25411 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25414 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25415 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25416 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25417 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25418 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25419 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25420 IX86_BUILTIN_RDRAND64_STEP);
25422 /* MMX access to the vec_init patterns. */
25423 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25424 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25426 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25427 V4HI_FTYPE_HI_HI_HI_HI,
25428 IX86_BUILTIN_VEC_INIT_V4HI);
25430 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25431 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25432 IX86_BUILTIN_VEC_INIT_V8QI);
25434 /* Access to the vec_extract patterns. */
25435 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25436 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25437 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25438 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25439 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25440 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25441 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25442 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25443 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25444 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25446 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25447 "__builtin_ia32_vec_ext_v4hi",
25448 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25450 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25451 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25453 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25454 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25456 /* Access to the vec_set patterns. */
25457 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25458 "__builtin_ia32_vec_set_v2di",
25459 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25461 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25462 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25464 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25465 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25467 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25468 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25470 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25471 "__builtin_ia32_vec_set_v4hi",
25472 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25474 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25475 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25477 /* Add FMA4 multi-arg argument instructions */
25478 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25483 ftype = (enum ix86_builtin_func_type) d->flag;
25484 def_builtin_const (d->mask, d->name, ftype, d->code);
25488 /* Internal method for ix86_init_builtins. */
25491 ix86_init_builtins_va_builtins_abi (void)
25493 tree ms_va_ref, sysv_va_ref;
25494 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25495 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25496 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25497 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25501 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25502 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25503 ms_va_ref = build_reference_type (ms_va_list_type_node);
25505 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25508 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25509 fnvoid_va_start_ms =
25510 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25511 fnvoid_va_end_sysv =
25512 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25513 fnvoid_va_start_sysv =
25514 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25516 fnvoid_va_copy_ms =
25517 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25519 fnvoid_va_copy_sysv =
25520 build_function_type_list (void_type_node, sysv_va_ref,
25521 sysv_va_ref, NULL_TREE);
25523 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25524 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25525 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25526 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25527 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25528 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25529 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25530 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25531 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25532 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25533 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25534 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25538 ix86_init_builtin_types (void)
25540 tree float128_type_node, float80_type_node;
25542 /* The __float80 type. */
25543 float80_type_node = long_double_type_node;
25544 if (TYPE_MODE (float80_type_node) != XFmode)
25546 /* The __float80 type. */
25547 float80_type_node = make_node (REAL_TYPE);
25549 TYPE_PRECISION (float80_type_node) = 80;
25550 layout_type (float80_type_node);
25552 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25554 /* The __float128 type. */
25555 float128_type_node = make_node (REAL_TYPE);
25556 TYPE_PRECISION (float128_type_node) = 128;
25557 layout_type (float128_type_node);
25558 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25560 /* This macro is built by i386-builtin-types.awk. */
25561 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25565 ix86_init_builtins (void)
25569 ix86_init_builtin_types ();
25571 /* TFmode support builtins. */
25572 def_builtin_const (0, "__builtin_infq",
25573 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25574 def_builtin_const (0, "__builtin_huge_valq",
25575 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25577 /* We will expand them to normal call if SSE2 isn't available since
25578 they are used by libgcc. */
25579 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25580 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25581 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25582 TREE_READONLY (t) = 1;
25583 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25585 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25586 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25587 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25588 TREE_READONLY (t) = 1;
25589 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25591 ix86_init_mmx_sse_builtins ();
25594 ix86_init_builtins_va_builtins_abi ();
25596 #ifdef SUBTARGET_INIT_BUILTINS
25597 SUBTARGET_INIT_BUILTINS;
25601 /* Return the ix86 builtin for CODE. */
25604 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25606 if (code >= IX86_BUILTIN_MAX)
25607 return error_mark_node;
25609 return ix86_builtins[code];
25612 /* Errors in the source file can cause expand_expr to return const0_rtx
25613 where we expect a vector. To avoid crashing, use one of the vector
25614 clear instructions. */
25616 safe_vector_operand (rtx x, enum machine_mode mode)
25618 if (x == const0_rtx)
25619 x = CONST0_RTX (mode);
25623 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25626 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25629 tree arg0 = CALL_EXPR_ARG (exp, 0);
25630 tree arg1 = CALL_EXPR_ARG (exp, 1);
25631 rtx op0 = expand_normal (arg0);
25632 rtx op1 = expand_normal (arg1);
25633 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25634 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25635 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25637 if (VECTOR_MODE_P (mode0))
25638 op0 = safe_vector_operand (op0, mode0);
25639 if (VECTOR_MODE_P (mode1))
25640 op1 = safe_vector_operand (op1, mode1);
25642 if (optimize || !target
25643 || GET_MODE (target) != tmode
25644 || !insn_data[icode].operand[0].predicate (target, tmode))
25645 target = gen_reg_rtx (tmode);
25647 if (GET_MODE (op1) == SImode && mode1 == TImode)
25649 rtx x = gen_reg_rtx (V4SImode);
25650 emit_insn (gen_sse2_loadd (x, op1));
25651 op1 = gen_lowpart (TImode, x);
25654 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25655 op0 = copy_to_mode_reg (mode0, op0);
25656 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25657 op1 = copy_to_mode_reg (mode1, op1);
25659 pat = GEN_FCN (icode) (target, op0, op1);
25668 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25671 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25672 enum ix86_builtin_func_type m_type,
25673 enum rtx_code sub_code)
25678 bool comparison_p = false;
25680 bool last_arg_constant = false;
25681 int num_memory = 0;
25684 enum machine_mode mode;
25687 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25691 case MULTI_ARG_4_DF2_DI_I:
25692 case MULTI_ARG_4_DF2_DI_I1:
25693 case MULTI_ARG_4_SF2_SI_I:
25694 case MULTI_ARG_4_SF2_SI_I1:
25696 last_arg_constant = true;
25699 case MULTI_ARG_3_SF:
25700 case MULTI_ARG_3_DF:
25701 case MULTI_ARG_3_SF2:
25702 case MULTI_ARG_3_DF2:
25703 case MULTI_ARG_3_DI:
25704 case MULTI_ARG_3_SI:
25705 case MULTI_ARG_3_SI_DI:
25706 case MULTI_ARG_3_HI:
25707 case MULTI_ARG_3_HI_SI:
25708 case MULTI_ARG_3_QI:
25709 case MULTI_ARG_3_DI2:
25710 case MULTI_ARG_3_SI2:
25711 case MULTI_ARG_3_HI2:
25712 case MULTI_ARG_3_QI2:
25716 case MULTI_ARG_2_SF:
25717 case MULTI_ARG_2_DF:
25718 case MULTI_ARG_2_DI:
25719 case MULTI_ARG_2_SI:
25720 case MULTI_ARG_2_HI:
25721 case MULTI_ARG_2_QI:
25725 case MULTI_ARG_2_DI_IMM:
25726 case MULTI_ARG_2_SI_IMM:
25727 case MULTI_ARG_2_HI_IMM:
25728 case MULTI_ARG_2_QI_IMM:
25730 last_arg_constant = true;
25733 case MULTI_ARG_1_SF:
25734 case MULTI_ARG_1_DF:
25735 case MULTI_ARG_1_SF2:
25736 case MULTI_ARG_1_DF2:
25737 case MULTI_ARG_1_DI:
25738 case MULTI_ARG_1_SI:
25739 case MULTI_ARG_1_HI:
25740 case MULTI_ARG_1_QI:
25741 case MULTI_ARG_1_SI_DI:
25742 case MULTI_ARG_1_HI_DI:
25743 case MULTI_ARG_1_HI_SI:
25744 case MULTI_ARG_1_QI_DI:
25745 case MULTI_ARG_1_QI_SI:
25746 case MULTI_ARG_1_QI_HI:
25750 case MULTI_ARG_2_DI_CMP:
25751 case MULTI_ARG_2_SI_CMP:
25752 case MULTI_ARG_2_HI_CMP:
25753 case MULTI_ARG_2_QI_CMP:
25755 comparison_p = true;
25758 case MULTI_ARG_2_SF_TF:
25759 case MULTI_ARG_2_DF_TF:
25760 case MULTI_ARG_2_DI_TF:
25761 case MULTI_ARG_2_SI_TF:
25762 case MULTI_ARG_2_HI_TF:
25763 case MULTI_ARG_2_QI_TF:
25769 gcc_unreachable ();
25772 if (optimize || !target
25773 || GET_MODE (target) != tmode
25774 || !insn_data[icode].operand[0].predicate (target, tmode))
25775 target = gen_reg_rtx (tmode);
25777 gcc_assert (nargs <= 4);
25779 for (i = 0; i < nargs; i++)
25781 tree arg = CALL_EXPR_ARG (exp, i);
25782 rtx op = expand_normal (arg);
25783 int adjust = (comparison_p) ? 1 : 0;
25784 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25786 if (last_arg_constant && i == nargs - 1)
25788 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
25790 enum insn_code new_icode = icode;
25793 case CODE_FOR_xop_vpermil2v2df3:
25794 case CODE_FOR_xop_vpermil2v4sf3:
25795 case CODE_FOR_xop_vpermil2v4df3:
25796 case CODE_FOR_xop_vpermil2v8sf3:
25797 error ("the last argument must be a 2-bit immediate");
25798 return gen_reg_rtx (tmode);
25799 case CODE_FOR_xop_rotlv2di3:
25800 new_icode = CODE_FOR_rotlv2di3;
25802 case CODE_FOR_xop_rotlv4si3:
25803 new_icode = CODE_FOR_rotlv4si3;
25805 case CODE_FOR_xop_rotlv8hi3:
25806 new_icode = CODE_FOR_rotlv8hi3;
25808 case CODE_FOR_xop_rotlv16qi3:
25809 new_icode = CODE_FOR_rotlv16qi3;
25811 if (CONST_INT_P (op))
25813 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
25814 op = GEN_INT (INTVAL (op) & mask);
25815 gcc_checking_assert
25816 (insn_data[icode].operand[i + 1].predicate (op, mode));
25820 gcc_checking_assert
25822 && insn_data[new_icode].operand[0].mode == tmode
25823 && insn_data[new_icode].operand[1].mode == tmode
25824 && insn_data[new_icode].operand[2].mode == mode
25825 && insn_data[new_icode].operand[0].predicate
25826 == insn_data[icode].operand[0].predicate
25827 && insn_data[new_icode].operand[1].predicate
25828 == insn_data[icode].operand[1].predicate);
25834 gcc_unreachable ();
25841 if (VECTOR_MODE_P (mode))
25842 op = safe_vector_operand (op, mode);
25844 /* If we aren't optimizing, only allow one memory operand to be
25846 if (memory_operand (op, mode))
25849 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25852 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25854 op = force_reg (mode, op);
25858 args[i].mode = mode;
25864 pat = GEN_FCN (icode) (target, args[0].op);
25869 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25870 GEN_INT ((int)sub_code));
25871 else if (! comparison_p)
25872 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25875 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25879 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25884 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25888 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25892 gcc_unreachable ();
25902 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25903 insns with vec_merge. */
25906 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25910 tree arg0 = CALL_EXPR_ARG (exp, 0);
25911 rtx op1, op0 = expand_normal (arg0);
25912 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25913 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25915 if (optimize || !target
25916 || GET_MODE (target) != tmode
25917 || !insn_data[icode].operand[0].predicate (target, tmode))
25918 target = gen_reg_rtx (tmode);
25920 if (VECTOR_MODE_P (mode0))
25921 op0 = safe_vector_operand (op0, mode0);
25923 if ((optimize && !register_operand (op0, mode0))
25924 || !insn_data[icode].operand[1].predicate (op0, mode0))
25925 op0 = copy_to_mode_reg (mode0, op0);
25928 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25929 op1 = copy_to_mode_reg (mode0, op1);
25931 pat = GEN_FCN (icode) (target, op0, op1);
25938 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25941 ix86_expand_sse_compare (const struct builtin_description *d,
25942 tree exp, rtx target, bool swap)
25945 tree arg0 = CALL_EXPR_ARG (exp, 0);
25946 tree arg1 = CALL_EXPR_ARG (exp, 1);
25947 rtx op0 = expand_normal (arg0);
25948 rtx op1 = expand_normal (arg1);
25950 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25951 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25952 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25953 enum rtx_code comparison = d->comparison;
25955 if (VECTOR_MODE_P (mode0))
25956 op0 = safe_vector_operand (op0, mode0);
25957 if (VECTOR_MODE_P (mode1))
25958 op1 = safe_vector_operand (op1, mode1);
25960 /* Swap operands if we have a comparison that isn't available in
25964 rtx tmp = gen_reg_rtx (mode1);
25965 emit_move_insn (tmp, op1);
25970 if (optimize || !target
25971 || GET_MODE (target) != tmode
25972 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25973 target = gen_reg_rtx (tmode);
25975 if ((optimize && !register_operand (op0, mode0))
25976 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25977 op0 = copy_to_mode_reg (mode0, op0);
25978 if ((optimize && !register_operand (op1, mode1))
25979 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25980 op1 = copy_to_mode_reg (mode1, op1);
25982 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25983 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25990 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25993 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25997 tree arg0 = CALL_EXPR_ARG (exp, 0);
25998 tree arg1 = CALL_EXPR_ARG (exp, 1);
25999 rtx op0 = expand_normal (arg0);
26000 rtx op1 = expand_normal (arg1);
26001 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26002 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26003 enum rtx_code comparison = d->comparison;
26005 if (VECTOR_MODE_P (mode0))
26006 op0 = safe_vector_operand (op0, mode0);
26007 if (VECTOR_MODE_P (mode1))
26008 op1 = safe_vector_operand (op1, mode1);
26010 /* Swap operands if we have a comparison that isn't available in
26012 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26019 target = gen_reg_rtx (SImode);
26020 emit_move_insn (target, const0_rtx);
26021 target = gen_rtx_SUBREG (QImode, target, 0);
26023 if ((optimize && !register_operand (op0, mode0))
26024 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26025 op0 = copy_to_mode_reg (mode0, op0);
26026 if ((optimize && !register_operand (op1, mode1))
26027 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26028 op1 = copy_to_mode_reg (mode1, op1);
26030 pat = GEN_FCN (d->icode) (op0, op1);
26034 emit_insn (gen_rtx_SET (VOIDmode,
26035 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26036 gen_rtx_fmt_ee (comparison, QImode,
26040 return SUBREG_REG (target);
26043 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26046 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26050 tree arg0 = CALL_EXPR_ARG (exp, 0);
26051 rtx op1, op0 = expand_normal (arg0);
26052 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26053 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26055 if (optimize || target == 0
26056 || GET_MODE (target) != tmode
26057 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26058 target = gen_reg_rtx (tmode);
26060 if (VECTOR_MODE_P (mode0))
26061 op0 = safe_vector_operand (op0, mode0);
26063 if ((optimize && !register_operand (op0, mode0))
26064 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26065 op0 = copy_to_mode_reg (mode0, op0);
26067 op1 = GEN_INT (d->comparison);
26069 pat = GEN_FCN (d->icode) (target, op0, op1);
26076 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26079 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26083 tree arg0 = CALL_EXPR_ARG (exp, 0);
26084 tree arg1 = CALL_EXPR_ARG (exp, 1);
26085 rtx op0 = expand_normal (arg0);
26086 rtx op1 = expand_normal (arg1);
26087 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26088 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26089 enum rtx_code comparison = d->comparison;
26091 if (VECTOR_MODE_P (mode0))
26092 op0 = safe_vector_operand (op0, mode0);
26093 if (VECTOR_MODE_P (mode1))
26094 op1 = safe_vector_operand (op1, mode1);
26096 target = gen_reg_rtx (SImode);
26097 emit_move_insn (target, const0_rtx);
26098 target = gen_rtx_SUBREG (QImode, target, 0);
26100 if ((optimize && !register_operand (op0, mode0))
26101 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26102 op0 = copy_to_mode_reg (mode0, op0);
26103 if ((optimize && !register_operand (op1, mode1))
26104 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26105 op1 = copy_to_mode_reg (mode1, op1);
26107 pat = GEN_FCN (d->icode) (op0, op1);
26111 emit_insn (gen_rtx_SET (VOIDmode,
26112 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26113 gen_rtx_fmt_ee (comparison, QImode,
26117 return SUBREG_REG (target);
26120 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26123 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26124 tree exp, rtx target)
26127 tree arg0 = CALL_EXPR_ARG (exp, 0);
26128 tree arg1 = CALL_EXPR_ARG (exp, 1);
26129 tree arg2 = CALL_EXPR_ARG (exp, 2);
26130 tree arg3 = CALL_EXPR_ARG (exp, 3);
26131 tree arg4 = CALL_EXPR_ARG (exp, 4);
26132 rtx scratch0, scratch1;
26133 rtx op0 = expand_normal (arg0);
26134 rtx op1 = expand_normal (arg1);
26135 rtx op2 = expand_normal (arg2);
26136 rtx op3 = expand_normal (arg3);
26137 rtx op4 = expand_normal (arg4);
26138 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26140 tmode0 = insn_data[d->icode].operand[0].mode;
26141 tmode1 = insn_data[d->icode].operand[1].mode;
26142 modev2 = insn_data[d->icode].operand[2].mode;
26143 modei3 = insn_data[d->icode].operand[3].mode;
26144 modev4 = insn_data[d->icode].operand[4].mode;
26145 modei5 = insn_data[d->icode].operand[5].mode;
26146 modeimm = insn_data[d->icode].operand[6].mode;
26148 if (VECTOR_MODE_P (modev2))
26149 op0 = safe_vector_operand (op0, modev2);
26150 if (VECTOR_MODE_P (modev4))
26151 op2 = safe_vector_operand (op2, modev4);
26153 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26154 op0 = copy_to_mode_reg (modev2, op0);
26155 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26156 op1 = copy_to_mode_reg (modei3, op1);
26157 if ((optimize && !register_operand (op2, modev4))
26158 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26159 op2 = copy_to_mode_reg (modev4, op2);
26160 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26161 op3 = copy_to_mode_reg (modei5, op3);
26163 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26165 error ("the fifth argument must be an 8-bit immediate");
26169 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26171 if (optimize || !target
26172 || GET_MODE (target) != tmode0
26173 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26174 target = gen_reg_rtx (tmode0);
26176 scratch1 = gen_reg_rtx (tmode1);
26178 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26180 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26182 if (optimize || !target
26183 || GET_MODE (target) != tmode1
26184 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26185 target = gen_reg_rtx (tmode1);
26187 scratch0 = gen_reg_rtx (tmode0);
26189 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26193 gcc_assert (d->flag);
26195 scratch0 = gen_reg_rtx (tmode0);
26196 scratch1 = gen_reg_rtx (tmode1);
26198 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26208 target = gen_reg_rtx (SImode);
26209 emit_move_insn (target, const0_rtx);
26210 target = gen_rtx_SUBREG (QImode, target, 0);
26213 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26214 gen_rtx_fmt_ee (EQ, QImode,
26215 gen_rtx_REG ((enum machine_mode) d->flag,
26218 return SUBREG_REG (target);
26225 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26228 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26229 tree exp, rtx target)
26232 tree arg0 = CALL_EXPR_ARG (exp, 0);
26233 tree arg1 = CALL_EXPR_ARG (exp, 1);
26234 tree arg2 = CALL_EXPR_ARG (exp, 2);
26235 rtx scratch0, scratch1;
26236 rtx op0 = expand_normal (arg0);
26237 rtx op1 = expand_normal (arg1);
26238 rtx op2 = expand_normal (arg2);
26239 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26241 tmode0 = insn_data[d->icode].operand[0].mode;
26242 tmode1 = insn_data[d->icode].operand[1].mode;
26243 modev2 = insn_data[d->icode].operand[2].mode;
26244 modev3 = insn_data[d->icode].operand[3].mode;
26245 modeimm = insn_data[d->icode].operand[4].mode;
26247 if (VECTOR_MODE_P (modev2))
26248 op0 = safe_vector_operand (op0, modev2);
26249 if (VECTOR_MODE_P (modev3))
26250 op1 = safe_vector_operand (op1, modev3);
26252 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26253 op0 = copy_to_mode_reg (modev2, op0);
26254 if ((optimize && !register_operand (op1, modev3))
26255 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26256 op1 = copy_to_mode_reg (modev3, op1);
26258 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26260 error ("the third argument must be an 8-bit immediate");
26264 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26266 if (optimize || !target
26267 || GET_MODE (target) != tmode0
26268 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26269 target = gen_reg_rtx (tmode0);
26271 scratch1 = gen_reg_rtx (tmode1);
26273 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26275 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26277 if (optimize || !target
26278 || GET_MODE (target) != tmode1
26279 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26280 target = gen_reg_rtx (tmode1);
26282 scratch0 = gen_reg_rtx (tmode0);
26284 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26288 gcc_assert (d->flag);
26290 scratch0 = gen_reg_rtx (tmode0);
26291 scratch1 = gen_reg_rtx (tmode1);
26293 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26303 target = gen_reg_rtx (SImode);
26304 emit_move_insn (target, const0_rtx);
26305 target = gen_rtx_SUBREG (QImode, target, 0);
26308 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26309 gen_rtx_fmt_ee (EQ, QImode,
26310 gen_rtx_REG ((enum machine_mode) d->flag,
26313 return SUBREG_REG (target);
26319 /* Subroutine of ix86_expand_builtin to take care of insns with
26320 variable number of operands. */
26323 ix86_expand_args_builtin (const struct builtin_description *d,
26324 tree exp, rtx target)
26326 rtx pat, real_target;
26327 unsigned int i, nargs;
26328 unsigned int nargs_constant = 0;
26329 int num_memory = 0;
26333 enum machine_mode mode;
26335 bool last_arg_count = false;
26336 enum insn_code icode = d->icode;
26337 const struct insn_data_d *insn_p = &insn_data[icode];
26338 enum machine_mode tmode = insn_p->operand[0].mode;
26339 enum machine_mode rmode = VOIDmode;
26341 enum rtx_code comparison = d->comparison;
26343 switch ((enum ix86_builtin_func_type) d->flag)
26345 case V2DF_FTYPE_V2DF_ROUND:
26346 case V4DF_FTYPE_V4DF_ROUND:
26347 case V4SF_FTYPE_V4SF_ROUND:
26348 case V8SF_FTYPE_V8SF_ROUND:
26349 return ix86_expand_sse_round (d, exp, target);
26350 case INT_FTYPE_V8SF_V8SF_PTEST:
26351 case INT_FTYPE_V4DI_V4DI_PTEST:
26352 case INT_FTYPE_V4DF_V4DF_PTEST:
26353 case INT_FTYPE_V4SF_V4SF_PTEST:
26354 case INT_FTYPE_V2DI_V2DI_PTEST:
26355 case INT_FTYPE_V2DF_V2DF_PTEST:
26356 return ix86_expand_sse_ptest (d, exp, target);
26357 case FLOAT128_FTYPE_FLOAT128:
26358 case FLOAT_FTYPE_FLOAT:
26359 case INT_FTYPE_INT:
26360 case UINT64_FTYPE_INT:
26361 case UINT16_FTYPE_UINT16:
26362 case INT64_FTYPE_INT64:
26363 case INT64_FTYPE_V4SF:
26364 case INT64_FTYPE_V2DF:
26365 case INT_FTYPE_V16QI:
26366 case INT_FTYPE_V8QI:
26367 case INT_FTYPE_V8SF:
26368 case INT_FTYPE_V4DF:
26369 case INT_FTYPE_V4SF:
26370 case INT_FTYPE_V2DF:
26371 case V16QI_FTYPE_V16QI:
26372 case V8SI_FTYPE_V8SF:
26373 case V8SI_FTYPE_V4SI:
26374 case V8HI_FTYPE_V8HI:
26375 case V8HI_FTYPE_V16QI:
26376 case V8QI_FTYPE_V8QI:
26377 case V8SF_FTYPE_V8SF:
26378 case V8SF_FTYPE_V8SI:
26379 case V8SF_FTYPE_V4SF:
26380 case V8SF_FTYPE_V8HI:
26381 case V4SI_FTYPE_V4SI:
26382 case V4SI_FTYPE_V16QI:
26383 case V4SI_FTYPE_V4SF:
26384 case V4SI_FTYPE_V8SI:
26385 case V4SI_FTYPE_V8HI:
26386 case V4SI_FTYPE_V4DF:
26387 case V4SI_FTYPE_V2DF:
26388 case V4HI_FTYPE_V4HI:
26389 case V4DF_FTYPE_V4DF:
26390 case V4DF_FTYPE_V4SI:
26391 case V4DF_FTYPE_V4SF:
26392 case V4DF_FTYPE_V2DF:
26393 case V4SF_FTYPE_V4SF:
26394 case V4SF_FTYPE_V4SI:
26395 case V4SF_FTYPE_V8SF:
26396 case V4SF_FTYPE_V4DF:
26397 case V4SF_FTYPE_V8HI:
26398 case V4SF_FTYPE_V2DF:
26399 case V2DI_FTYPE_V2DI:
26400 case V2DI_FTYPE_V16QI:
26401 case V2DI_FTYPE_V8HI:
26402 case V2DI_FTYPE_V4SI:
26403 case V2DF_FTYPE_V2DF:
26404 case V2DF_FTYPE_V4SI:
26405 case V2DF_FTYPE_V4DF:
26406 case V2DF_FTYPE_V4SF:
26407 case V2DF_FTYPE_V2SI:
26408 case V2SI_FTYPE_V2SI:
26409 case V2SI_FTYPE_V4SF:
26410 case V2SI_FTYPE_V2SF:
26411 case V2SI_FTYPE_V2DF:
26412 case V2SF_FTYPE_V2SF:
26413 case V2SF_FTYPE_V2SI:
26416 case V4SF_FTYPE_V4SF_VEC_MERGE:
26417 case V2DF_FTYPE_V2DF_VEC_MERGE:
26418 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26419 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26420 case V16QI_FTYPE_V16QI_V16QI:
26421 case V16QI_FTYPE_V8HI_V8HI:
26422 case V8QI_FTYPE_V8QI_V8QI:
26423 case V8QI_FTYPE_V4HI_V4HI:
26424 case V8HI_FTYPE_V8HI_V8HI:
26425 case V8HI_FTYPE_V16QI_V16QI:
26426 case V8HI_FTYPE_V4SI_V4SI:
26427 case V8SF_FTYPE_V8SF_V8SF:
26428 case V8SF_FTYPE_V8SF_V8SI:
26429 case V4SI_FTYPE_V4SI_V4SI:
26430 case V4SI_FTYPE_V8HI_V8HI:
26431 case V4SI_FTYPE_V4SF_V4SF:
26432 case V4SI_FTYPE_V2DF_V2DF:
26433 case V4HI_FTYPE_V4HI_V4HI:
26434 case V4HI_FTYPE_V8QI_V8QI:
26435 case V4HI_FTYPE_V2SI_V2SI:
26436 case V4DF_FTYPE_V4DF_V4DF:
26437 case V4DF_FTYPE_V4DF_V4DI:
26438 case V4SF_FTYPE_V4SF_V4SF:
26439 case V4SF_FTYPE_V4SF_V4SI:
26440 case V4SF_FTYPE_V4SF_V2SI:
26441 case V4SF_FTYPE_V4SF_V2DF:
26442 case V4SF_FTYPE_V4SF_DI:
26443 case V4SF_FTYPE_V4SF_SI:
26444 case V2DI_FTYPE_V2DI_V2DI:
26445 case V2DI_FTYPE_V16QI_V16QI:
26446 case V2DI_FTYPE_V4SI_V4SI:
26447 case V2DI_FTYPE_V2DI_V16QI:
26448 case V2DI_FTYPE_V2DF_V2DF:
26449 case V2SI_FTYPE_V2SI_V2SI:
26450 case V2SI_FTYPE_V4HI_V4HI:
26451 case V2SI_FTYPE_V2SF_V2SF:
26452 case V2DF_FTYPE_V2DF_V2DF:
26453 case V2DF_FTYPE_V2DF_V4SF:
26454 case V2DF_FTYPE_V2DF_V2DI:
26455 case V2DF_FTYPE_V2DF_DI:
26456 case V2DF_FTYPE_V2DF_SI:
26457 case V2SF_FTYPE_V2SF_V2SF:
26458 case V1DI_FTYPE_V1DI_V1DI:
26459 case V1DI_FTYPE_V8QI_V8QI:
26460 case V1DI_FTYPE_V2SI_V2SI:
26461 if (comparison == UNKNOWN)
26462 return ix86_expand_binop_builtin (icode, exp, target);
26465 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26466 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26467 gcc_assert (comparison != UNKNOWN);
26471 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26472 case V8HI_FTYPE_V8HI_SI_COUNT:
26473 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26474 case V4SI_FTYPE_V4SI_SI_COUNT:
26475 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26476 case V4HI_FTYPE_V4HI_SI_COUNT:
26477 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26478 case V2DI_FTYPE_V2DI_SI_COUNT:
26479 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26480 case V2SI_FTYPE_V2SI_SI_COUNT:
26481 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26482 case V1DI_FTYPE_V1DI_SI_COUNT:
26484 last_arg_count = true;
26486 case UINT64_FTYPE_UINT64_UINT64:
26487 case UINT_FTYPE_UINT_UINT:
26488 case UINT_FTYPE_UINT_USHORT:
26489 case UINT_FTYPE_UINT_UCHAR:
26490 case UINT16_FTYPE_UINT16_INT:
26491 case UINT8_FTYPE_UINT8_INT:
26494 case V2DI_FTYPE_V2DI_INT_CONVERT:
26497 nargs_constant = 1;
26499 case V8HI_FTYPE_V8HI_INT:
26500 case V8HI_FTYPE_V8SF_INT:
26501 case V8HI_FTYPE_V4SF_INT:
26502 case V8SF_FTYPE_V8SF_INT:
26503 case V4SI_FTYPE_V4SI_INT:
26504 case V4SI_FTYPE_V8SI_INT:
26505 case V4HI_FTYPE_V4HI_INT:
26506 case V4DF_FTYPE_V4DF_INT:
26507 case V4SF_FTYPE_V4SF_INT:
26508 case V4SF_FTYPE_V8SF_INT:
26509 case V2DI_FTYPE_V2DI_INT:
26510 case V2DF_FTYPE_V2DF_INT:
26511 case V2DF_FTYPE_V4DF_INT:
26513 nargs_constant = 1;
26515 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26516 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26517 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26518 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26519 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26522 case V16QI_FTYPE_V16QI_V16QI_INT:
26523 case V8HI_FTYPE_V8HI_V8HI_INT:
26524 case V8SI_FTYPE_V8SI_V8SI_INT:
26525 case V8SI_FTYPE_V8SI_V4SI_INT:
26526 case V8SF_FTYPE_V8SF_V8SF_INT:
26527 case V8SF_FTYPE_V8SF_V4SF_INT:
26528 case V4SI_FTYPE_V4SI_V4SI_INT:
26529 case V4DF_FTYPE_V4DF_V4DF_INT:
26530 case V4DF_FTYPE_V4DF_V2DF_INT:
26531 case V4SF_FTYPE_V4SF_V4SF_INT:
26532 case V2DI_FTYPE_V2DI_V2DI_INT:
26533 case V2DF_FTYPE_V2DF_V2DF_INT:
26535 nargs_constant = 1;
26537 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26540 nargs_constant = 1;
26542 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26545 nargs_constant = 1;
26547 case V2DI_FTYPE_V2DI_UINT_UINT:
26549 nargs_constant = 2;
26551 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26552 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26553 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26554 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26556 nargs_constant = 1;
26558 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26560 nargs_constant = 2;
26563 gcc_unreachable ();
26566 gcc_assert (nargs <= ARRAY_SIZE (args));
26568 if (comparison != UNKNOWN)
26570 gcc_assert (nargs == 2);
26571 return ix86_expand_sse_compare (d, exp, target, swap);
26574 if (rmode == VOIDmode || rmode == tmode)
26578 || GET_MODE (target) != tmode
26579 || !insn_p->operand[0].predicate (target, tmode))
26580 target = gen_reg_rtx (tmode);
26581 real_target = target;
26585 target = gen_reg_rtx (rmode);
26586 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26589 for (i = 0; i < nargs; i++)
26591 tree arg = CALL_EXPR_ARG (exp, i);
26592 rtx op = expand_normal (arg);
26593 enum machine_mode mode = insn_p->operand[i + 1].mode;
26594 bool match = insn_p->operand[i + 1].predicate (op, mode);
26596 if (last_arg_count && (i + 1) == nargs)
26598 /* SIMD shift insns take either an 8-bit immediate or
26599 register as count. But builtin functions take int as
26600 count. If count doesn't match, we put it in register. */
26603 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26604 if (!insn_p->operand[i + 1].predicate (op, mode))
26605 op = copy_to_reg (op);
26608 else if ((nargs - i) <= nargs_constant)
26613 case CODE_FOR_sse4_1_roundpd:
26614 case CODE_FOR_sse4_1_roundps:
26615 case CODE_FOR_sse4_1_roundsd:
26616 case CODE_FOR_sse4_1_roundss:
26617 case CODE_FOR_sse4_1_blendps:
26618 case CODE_FOR_avx_blendpd256:
26619 case CODE_FOR_avx_vpermilv4df:
26620 case CODE_FOR_avx_roundpd256:
26621 case CODE_FOR_avx_roundps256:
26622 error ("the last argument must be a 4-bit immediate");
26625 case CODE_FOR_sse4_1_blendpd:
26626 case CODE_FOR_avx_vpermilv2df:
26627 case CODE_FOR_xop_vpermil2v2df3:
26628 case CODE_FOR_xop_vpermil2v4sf3:
26629 case CODE_FOR_xop_vpermil2v4df3:
26630 case CODE_FOR_xop_vpermil2v8sf3:
26631 error ("the last argument must be a 2-bit immediate");
26634 case CODE_FOR_avx_vextractf128v4df:
26635 case CODE_FOR_avx_vextractf128v8sf:
26636 case CODE_FOR_avx_vextractf128v8si:
26637 case CODE_FOR_avx_vinsertf128v4df:
26638 case CODE_FOR_avx_vinsertf128v8sf:
26639 case CODE_FOR_avx_vinsertf128v8si:
26640 error ("the last argument must be a 1-bit immediate");
26643 case CODE_FOR_avx_vmcmpv2df3:
26644 case CODE_FOR_avx_vmcmpv4sf3:
26645 case CODE_FOR_avx_cmpv2df3:
26646 case CODE_FOR_avx_cmpv4sf3:
26647 case CODE_FOR_avx_cmpv4df3:
26648 case CODE_FOR_avx_cmpv8sf3:
26649 error ("the last argument must be a 5-bit immediate");
26653 switch (nargs_constant)
26656 if ((nargs - i) == nargs_constant)
26658 error ("the next to last argument must be an 8-bit immediate");
26662 error ("the last argument must be an 8-bit immediate");
26665 gcc_unreachable ();
26672 if (VECTOR_MODE_P (mode))
26673 op = safe_vector_operand (op, mode);
26675 /* If we aren't optimizing, only allow one memory operand to
26677 if (memory_operand (op, mode))
26680 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26682 if (optimize || !match || num_memory > 1)
26683 op = copy_to_mode_reg (mode, op);
26687 op = copy_to_reg (op);
26688 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26693 args[i].mode = mode;
26699 pat = GEN_FCN (icode) (real_target, args[0].op);
26702 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26705 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26709 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26710 args[2].op, args[3].op);
26713 gcc_unreachable ();
26723 /* Subroutine of ix86_expand_builtin to take care of special insns
26724 with variable number of operands. */
26727 ix86_expand_special_args_builtin (const struct builtin_description *d,
26728 tree exp, rtx target)
26732 unsigned int i, nargs, arg_adjust, memory;
26736 enum machine_mode mode;
26738 enum insn_code icode = d->icode;
26739 bool last_arg_constant = false;
26740 const struct insn_data_d *insn_p = &insn_data[icode];
26741 enum machine_mode tmode = insn_p->operand[0].mode;
26742 enum { load, store } klass;
26744 switch ((enum ix86_builtin_func_type) d->flag)
26746 case VOID_FTYPE_VOID:
26747 if (icode == CODE_FOR_avx_vzeroupper)
26748 target = GEN_INT (vzeroupper_intrinsic);
26749 emit_insn (GEN_FCN (icode) (target));
26751 case VOID_FTYPE_UINT64:
26752 case VOID_FTYPE_UNSIGNED:
26758 case UINT64_FTYPE_VOID:
26759 case UNSIGNED_FTYPE_VOID:
26764 case UINT64_FTYPE_PUNSIGNED:
26765 case V2DI_FTYPE_PV2DI:
26766 case V32QI_FTYPE_PCCHAR:
26767 case V16QI_FTYPE_PCCHAR:
26768 case V8SF_FTYPE_PCV4SF:
26769 case V8SF_FTYPE_PCFLOAT:
26770 case V4SF_FTYPE_PCFLOAT:
26771 case V4DF_FTYPE_PCV2DF:
26772 case V4DF_FTYPE_PCDOUBLE:
26773 case V2DF_FTYPE_PCDOUBLE:
26774 case VOID_FTYPE_PVOID:
26779 case VOID_FTYPE_PV2SF_V4SF:
26780 case VOID_FTYPE_PV4DI_V4DI:
26781 case VOID_FTYPE_PV2DI_V2DI:
26782 case VOID_FTYPE_PCHAR_V32QI:
26783 case VOID_FTYPE_PCHAR_V16QI:
26784 case VOID_FTYPE_PFLOAT_V8SF:
26785 case VOID_FTYPE_PFLOAT_V4SF:
26786 case VOID_FTYPE_PDOUBLE_V4DF:
26787 case VOID_FTYPE_PDOUBLE_V2DF:
26788 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26789 case VOID_FTYPE_PINT_INT:
26792 /* Reserve memory operand for target. */
26793 memory = ARRAY_SIZE (args);
26795 case V4SF_FTYPE_V4SF_PCV2SF:
26796 case V2DF_FTYPE_V2DF_PCDOUBLE:
26801 case V8SF_FTYPE_PCV8SF_V8SI:
26802 case V4DF_FTYPE_PCV4DF_V4DI:
26803 case V4SF_FTYPE_PCV4SF_V4SI:
26804 case V2DF_FTYPE_PCV2DF_V2DI:
26809 case VOID_FTYPE_PV8SF_V8SI_V8SF:
26810 case VOID_FTYPE_PV4DF_V4DI_V4DF:
26811 case VOID_FTYPE_PV4SF_V4SI_V4SF:
26812 case VOID_FTYPE_PV2DF_V2DI_V2DF:
26815 /* Reserve memory operand for target. */
26816 memory = ARRAY_SIZE (args);
26818 case VOID_FTYPE_UINT_UINT_UINT:
26819 case VOID_FTYPE_UINT64_UINT_UINT:
26820 case UCHAR_FTYPE_UINT_UINT_UINT:
26821 case UCHAR_FTYPE_UINT64_UINT_UINT:
26824 memory = ARRAY_SIZE (args);
26825 last_arg_constant = true;
26828 gcc_unreachable ();
26831 gcc_assert (nargs <= ARRAY_SIZE (args));
26833 if (klass == store)
26835 arg = CALL_EXPR_ARG (exp, 0);
26836 op = expand_normal (arg);
26837 gcc_assert (target == 0);
26840 if (GET_MODE (op) != Pmode)
26841 op = convert_to_mode (Pmode, op, 1);
26842 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
26845 target = force_reg (tmode, op);
26853 || GET_MODE (target) != tmode
26854 || !insn_p->operand[0].predicate (target, tmode))
26855 target = gen_reg_rtx (tmode);
26858 for (i = 0; i < nargs; i++)
26860 enum machine_mode mode = insn_p->operand[i + 1].mode;
26863 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26864 op = expand_normal (arg);
26865 match = insn_p->operand[i + 1].predicate (op, mode);
26867 if (last_arg_constant && (i + 1) == nargs)
26871 if (icode == CODE_FOR_lwp_lwpvalsi3
26872 || icode == CODE_FOR_lwp_lwpinssi3
26873 || icode == CODE_FOR_lwp_lwpvaldi3
26874 || icode == CODE_FOR_lwp_lwpinsdi3)
26875 error ("the last argument must be a 32-bit immediate");
26877 error ("the last argument must be an 8-bit immediate");
26885 /* This must be the memory operand. */
26886 if (GET_MODE (op) != Pmode)
26887 op = convert_to_mode (Pmode, op, 1);
26888 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
26889 gcc_assert (GET_MODE (op) == mode
26890 || GET_MODE (op) == VOIDmode);
26894 /* This must be register. */
26895 if (VECTOR_MODE_P (mode))
26896 op = safe_vector_operand (op, mode);
26898 gcc_assert (GET_MODE (op) == mode
26899 || GET_MODE (op) == VOIDmode);
26900 op = copy_to_mode_reg (mode, op);
26905 args[i].mode = mode;
26911 pat = GEN_FCN (icode) (target);
26914 pat = GEN_FCN (icode) (target, args[0].op);
26917 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26920 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26923 gcc_unreachable ();
26929 return klass == store ? 0 : target;
26932 /* Return the integer constant in ARG. Constrain it to be in the range
26933 of the subparts of VEC_TYPE; issue an error if not. */
26936 get_element_number (tree vec_type, tree arg)
26938 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26940 if (!host_integerp (arg, 1)
26941 || (elt = tree_low_cst (arg, 1), elt > max))
26943 error ("selector must be an integer constant in the range 0..%wi", max);
26950 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26951 ix86_expand_vector_init. We DO have language-level syntax for this, in
26952 the form of (type){ init-list }. Except that since we can't place emms
26953 instructions from inside the compiler, we can't allow the use of MMX
26954 registers unless the user explicitly asks for it. So we do *not* define
26955 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26956 we have builtins invoked by mmintrin.h that gives us license to emit
26957 these sorts of instructions. */
26960 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26962 enum machine_mode tmode = TYPE_MODE (type);
26963 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26964 int i, n_elt = GET_MODE_NUNITS (tmode);
26965 rtvec v = rtvec_alloc (n_elt);
26967 gcc_assert (VECTOR_MODE_P (tmode));
26968 gcc_assert (call_expr_nargs (exp) == n_elt);
26970 for (i = 0; i < n_elt; ++i)
26972 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26973 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26976 if (!target || !register_operand (target, tmode))
26977 target = gen_reg_rtx (tmode);
26979 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26983 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26984 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26985 had a language-level syntax for referencing vector elements. */
26988 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26990 enum machine_mode tmode, mode0;
26995 arg0 = CALL_EXPR_ARG (exp, 0);
26996 arg1 = CALL_EXPR_ARG (exp, 1);
26998 op0 = expand_normal (arg0);
26999 elt = get_element_number (TREE_TYPE (arg0), arg1);
27001 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27002 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27003 gcc_assert (VECTOR_MODE_P (mode0));
27005 op0 = force_reg (mode0, op0);
27007 if (optimize || !target || !register_operand (target, tmode))
27008 target = gen_reg_rtx (tmode);
27010 ix86_expand_vector_extract (true, target, op0, elt);
27015 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27016 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27017 a language-level syntax for referencing vector elements. */
27020 ix86_expand_vec_set_builtin (tree exp)
27022 enum machine_mode tmode, mode1;
27023 tree arg0, arg1, arg2;
27025 rtx op0, op1, target;
27027 arg0 = CALL_EXPR_ARG (exp, 0);
27028 arg1 = CALL_EXPR_ARG (exp, 1);
27029 arg2 = CALL_EXPR_ARG (exp, 2);
27031 tmode = TYPE_MODE (TREE_TYPE (arg0));
27032 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27033 gcc_assert (VECTOR_MODE_P (tmode));
27035 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27036 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27037 elt = get_element_number (TREE_TYPE (arg0), arg2);
27039 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27040 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27042 op0 = force_reg (tmode, op0);
27043 op1 = force_reg (mode1, op1);
27045 /* OP0 is the source of these builtin functions and shouldn't be
27046 modified. Create a copy, use it and return it as target. */
27047 target = gen_reg_rtx (tmode);
27048 emit_move_insn (target, op0);
27049 ix86_expand_vector_set (true, target, op1, elt);
27054 /* Expand an expression EXP that calls a built-in function,
27055 with result going to TARGET if that's convenient
27056 (and in mode MODE if that's convenient).
27057 SUBTARGET may be used as the target for computing one of EXP's operands.
27058 IGNORE is nonzero if the value is to be ignored. */
27061 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27062 enum machine_mode mode ATTRIBUTE_UNUSED,
27063 int ignore ATTRIBUTE_UNUSED)
27065 const struct builtin_description *d;
27067 enum insn_code icode;
27068 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27069 tree arg0, arg1, arg2;
27070 rtx op0, op1, op2, pat;
27071 enum machine_mode mode0, mode1, mode2;
27072 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27074 /* Determine whether the builtin function is available under the current ISA.
27075 Originally the builtin was not created if it wasn't applicable to the
27076 current ISA based on the command line switches. With function specific
27077 options, we need to check in the context of the function making the call
27078 whether it is supported. */
27079 if (ix86_builtins_isa[fcode].isa
27080 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27082 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27083 NULL, (enum fpmath_unit) 0, false);
27086 error ("%qE needs unknown isa option", fndecl);
27089 gcc_assert (opts != NULL);
27090 error ("%qE needs isa option %s", fndecl, opts);
27098 case IX86_BUILTIN_MASKMOVQ:
27099 case IX86_BUILTIN_MASKMOVDQU:
27100 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27101 ? CODE_FOR_mmx_maskmovq
27102 : CODE_FOR_sse2_maskmovdqu);
27103 /* Note the arg order is different from the operand order. */
27104 arg1 = CALL_EXPR_ARG (exp, 0);
27105 arg2 = CALL_EXPR_ARG (exp, 1);
27106 arg0 = CALL_EXPR_ARG (exp, 2);
27107 op0 = expand_normal (arg0);
27108 op1 = expand_normal (arg1);
27109 op2 = expand_normal (arg2);
27110 mode0 = insn_data[icode].operand[0].mode;
27111 mode1 = insn_data[icode].operand[1].mode;
27112 mode2 = insn_data[icode].operand[2].mode;
27114 if (GET_MODE (op0) != Pmode)
27115 op0 = convert_to_mode (Pmode, op0, 1);
27116 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
27118 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27119 op0 = copy_to_mode_reg (mode0, op0);
27120 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27121 op1 = copy_to_mode_reg (mode1, op1);
27122 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27123 op2 = copy_to_mode_reg (mode2, op2);
27124 pat = GEN_FCN (icode) (op0, op1, op2);
27130 case IX86_BUILTIN_LDMXCSR:
27131 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27132 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27133 emit_move_insn (target, op0);
27134 emit_insn (gen_sse_ldmxcsr (target));
27137 case IX86_BUILTIN_STMXCSR:
27138 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27139 emit_insn (gen_sse_stmxcsr (target));
27140 return copy_to_mode_reg (SImode, target);
27142 case IX86_BUILTIN_CLFLUSH:
27143 arg0 = CALL_EXPR_ARG (exp, 0);
27144 op0 = expand_normal (arg0);
27145 icode = CODE_FOR_sse2_clflush;
27146 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27148 if (GET_MODE (op0) != Pmode)
27149 op0 = convert_to_mode (Pmode, op0, 1);
27150 op0 = force_reg (Pmode, op0);
27153 emit_insn (gen_sse2_clflush (op0));
27156 case IX86_BUILTIN_MONITOR:
27157 arg0 = CALL_EXPR_ARG (exp, 0);
27158 arg1 = CALL_EXPR_ARG (exp, 1);
27159 arg2 = CALL_EXPR_ARG (exp, 2);
27160 op0 = expand_normal (arg0);
27161 op1 = expand_normal (arg1);
27162 op2 = expand_normal (arg2);
27165 if (GET_MODE (op0) != Pmode)
27166 op0 = convert_to_mode (Pmode, op0, 1);
27167 op0 = force_reg (Pmode, op0);
27170 op1 = copy_to_mode_reg (SImode, op1);
27172 op2 = copy_to_mode_reg (SImode, op2);
27173 emit_insn (ix86_gen_monitor (op0, op1, op2));
27176 case IX86_BUILTIN_MWAIT:
27177 arg0 = CALL_EXPR_ARG (exp, 0);
27178 arg1 = CALL_EXPR_ARG (exp, 1);
27179 op0 = expand_normal (arg0);
27180 op1 = expand_normal (arg1);
27182 op0 = copy_to_mode_reg (SImode, op0);
27184 op1 = copy_to_mode_reg (SImode, op1);
27185 emit_insn (gen_sse3_mwait (op0, op1));
27188 case IX86_BUILTIN_VEC_INIT_V2SI:
27189 case IX86_BUILTIN_VEC_INIT_V4HI:
27190 case IX86_BUILTIN_VEC_INIT_V8QI:
27191 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27193 case IX86_BUILTIN_VEC_EXT_V2DF:
27194 case IX86_BUILTIN_VEC_EXT_V2DI:
27195 case IX86_BUILTIN_VEC_EXT_V4SF:
27196 case IX86_BUILTIN_VEC_EXT_V4SI:
27197 case IX86_BUILTIN_VEC_EXT_V8HI:
27198 case IX86_BUILTIN_VEC_EXT_V2SI:
27199 case IX86_BUILTIN_VEC_EXT_V4HI:
27200 case IX86_BUILTIN_VEC_EXT_V16QI:
27201 return ix86_expand_vec_ext_builtin (exp, target);
27203 case IX86_BUILTIN_VEC_SET_V2DI:
27204 case IX86_BUILTIN_VEC_SET_V4SF:
27205 case IX86_BUILTIN_VEC_SET_V4SI:
27206 case IX86_BUILTIN_VEC_SET_V8HI:
27207 case IX86_BUILTIN_VEC_SET_V4HI:
27208 case IX86_BUILTIN_VEC_SET_V16QI:
27209 return ix86_expand_vec_set_builtin (exp);
27211 case IX86_BUILTIN_VEC_PERM_V2DF:
27212 case IX86_BUILTIN_VEC_PERM_V4SF:
27213 case IX86_BUILTIN_VEC_PERM_V2DI:
27214 case IX86_BUILTIN_VEC_PERM_V4SI:
27215 case IX86_BUILTIN_VEC_PERM_V8HI:
27216 case IX86_BUILTIN_VEC_PERM_V16QI:
27217 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27218 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27219 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27220 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27221 case IX86_BUILTIN_VEC_PERM_V4DF:
27222 case IX86_BUILTIN_VEC_PERM_V8SF:
27223 return ix86_expand_vec_perm_builtin (exp);
27225 case IX86_BUILTIN_INFQ:
27226 case IX86_BUILTIN_HUGE_VALQ:
27228 REAL_VALUE_TYPE inf;
27232 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27234 tmp = validize_mem (force_const_mem (mode, tmp));
27237 target = gen_reg_rtx (mode);
27239 emit_move_insn (target, tmp);
27243 case IX86_BUILTIN_LLWPCB:
27244 arg0 = CALL_EXPR_ARG (exp, 0);
27245 op0 = expand_normal (arg0);
27246 icode = CODE_FOR_lwp_llwpcb;
27247 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27249 if (GET_MODE (op0) != Pmode)
27250 op0 = convert_to_mode (Pmode, op0, 1);
27251 op0 = force_reg (Pmode, op0);
27253 emit_insn (gen_lwp_llwpcb (op0));
27256 case IX86_BUILTIN_SLWPCB:
27257 icode = CODE_FOR_lwp_slwpcb;
27259 || !insn_data[icode].operand[0].predicate (target, Pmode))
27260 target = gen_reg_rtx (Pmode);
27261 emit_insn (gen_lwp_slwpcb (target));
27264 case IX86_BUILTIN_BEXTRI32:
27265 case IX86_BUILTIN_BEXTRI64:
27266 arg0 = CALL_EXPR_ARG (exp, 0);
27267 arg1 = CALL_EXPR_ARG (exp, 1);
27268 op0 = expand_normal (arg0);
27269 op1 = expand_normal (arg1);
27270 icode = (fcode == IX86_BUILTIN_BEXTRI32
27271 ? CODE_FOR_tbm_bextri_si
27272 : CODE_FOR_tbm_bextri_di);
27273 if (!CONST_INT_P (op1))
27275 error ("last argument must be an immediate");
27280 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27281 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27282 op1 = GEN_INT (length);
27283 op2 = GEN_INT (lsb_index);
27284 pat = GEN_FCN (icode) (target, op0, op1, op2);
27290 case IX86_BUILTIN_RDRAND16_STEP:
27291 icode = CODE_FOR_rdrandhi_1;
27295 case IX86_BUILTIN_RDRAND32_STEP:
27296 icode = CODE_FOR_rdrandsi_1;
27300 case IX86_BUILTIN_RDRAND64_STEP:
27301 icode = CODE_FOR_rdranddi_1;
27305 op0 = gen_reg_rtx (mode0);
27306 emit_insn (GEN_FCN (icode) (op0));
27308 arg0 = CALL_EXPR_ARG (exp, 0);
27309 op1 = expand_normal (arg0);
27310 if (!address_operand (op1, VOIDmode))
27312 op1 = convert_memory_address (Pmode, op1);
27313 op1 = copy_addr_to_reg (op1);
27315 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27317 op1 = gen_reg_rtx (SImode);
27318 emit_move_insn (op1, CONST1_RTX (SImode));
27320 /* Emit SImode conditional move. */
27321 if (mode0 == HImode)
27323 op2 = gen_reg_rtx (SImode);
27324 emit_insn (gen_zero_extendhisi2 (op2, op0));
27326 else if (mode0 == SImode)
27329 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27332 target = gen_reg_rtx (SImode);
27334 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27336 emit_insn (gen_rtx_SET (VOIDmode, target,
27337 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27344 for (i = 0, d = bdesc_special_args;
27345 i < ARRAY_SIZE (bdesc_special_args);
27347 if (d->code == fcode)
27348 return ix86_expand_special_args_builtin (d, exp, target);
27350 for (i = 0, d = bdesc_args;
27351 i < ARRAY_SIZE (bdesc_args);
27353 if (d->code == fcode)
27356 case IX86_BUILTIN_FABSQ:
27357 case IX86_BUILTIN_COPYSIGNQ:
27359 /* Emit a normal call if SSE2 isn't available. */
27360 return expand_call (exp, target, ignore);
27362 return ix86_expand_args_builtin (d, exp, target);
27365 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27366 if (d->code == fcode)
27367 return ix86_expand_sse_comi (d, exp, target);
27369 for (i = 0, d = bdesc_pcmpestr;
27370 i < ARRAY_SIZE (bdesc_pcmpestr);
27372 if (d->code == fcode)
27373 return ix86_expand_sse_pcmpestr (d, exp, target);
27375 for (i = 0, d = bdesc_pcmpistr;
27376 i < ARRAY_SIZE (bdesc_pcmpistr);
27378 if (d->code == fcode)
27379 return ix86_expand_sse_pcmpistr (d, exp, target);
27381 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27382 if (d->code == fcode)
27383 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27384 (enum ix86_builtin_func_type)
27385 d->flag, d->comparison);
27387 gcc_unreachable ();
27390 /* Returns a function decl for a vectorized version of the builtin function
27391 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27392 if it is not available. */
27395 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27398 enum machine_mode in_mode, out_mode;
27400 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27402 if (TREE_CODE (type_out) != VECTOR_TYPE
27403 || TREE_CODE (type_in) != VECTOR_TYPE
27404 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27407 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27408 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27409 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27410 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27414 case BUILT_IN_SQRT:
27415 if (out_mode == DFmode && in_mode == DFmode)
27417 if (out_n == 2 && in_n == 2)
27418 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27419 else if (out_n == 4 && in_n == 4)
27420 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27424 case BUILT_IN_SQRTF:
27425 if (out_mode == SFmode && in_mode == SFmode)
27427 if (out_n == 4 && in_n == 4)
27428 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27429 else if (out_n == 8 && in_n == 8)
27430 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27434 case BUILT_IN_LRINT:
27435 if (out_mode == SImode && out_n == 4
27436 && in_mode == DFmode && in_n == 2)
27437 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27440 case BUILT_IN_LRINTF:
27441 if (out_mode == SImode && in_mode == SFmode)
27443 if (out_n == 4 && in_n == 4)
27444 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27445 else if (out_n == 8 && in_n == 8)
27446 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27450 case BUILT_IN_COPYSIGN:
27451 if (out_mode == DFmode && in_mode == DFmode)
27453 if (out_n == 2 && in_n == 2)
27454 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27455 else if (out_n == 4 && in_n == 4)
27456 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27460 case BUILT_IN_COPYSIGNF:
27461 if (out_mode == SFmode && in_mode == SFmode)
27463 if (out_n == 4 && in_n == 4)
27464 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27465 else if (out_n == 8 && in_n == 8)
27466 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27470 case BUILT_IN_FLOOR:
27471 /* The round insn does not trap on denormals. */
27472 if (flag_trapping_math || !TARGET_ROUND)
27475 if (out_mode == DFmode && in_mode == DFmode)
27477 if (out_n == 2 && in_n == 2)
27478 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27479 else if (out_n == 4 && in_n == 4)
27480 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27484 case BUILT_IN_FLOORF:
27485 /* The round insn does not trap on denormals. */
27486 if (flag_trapping_math || !TARGET_ROUND)
27489 if (out_mode == SFmode && in_mode == SFmode)
27491 if (out_n == 4 && in_n == 4)
27492 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27493 else if (out_n == 8 && in_n == 8)
27494 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27498 case BUILT_IN_CEIL:
27499 /* The round insn does not trap on denormals. */
27500 if (flag_trapping_math || !TARGET_ROUND)
27503 if (out_mode == DFmode && in_mode == DFmode)
27505 if (out_n == 2 && in_n == 2)
27506 return ix86_builtins[IX86_BUILTIN_CEILPD];
27507 else if (out_n == 4 && in_n == 4)
27508 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27512 case BUILT_IN_CEILF:
27513 /* The round insn does not trap on denormals. */
27514 if (flag_trapping_math || !TARGET_ROUND)
27517 if (out_mode == SFmode && in_mode == SFmode)
27519 if (out_n == 4 && in_n == 4)
27520 return ix86_builtins[IX86_BUILTIN_CEILPS];
27521 else if (out_n == 8 && in_n == 8)
27522 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27526 case BUILT_IN_TRUNC:
27527 /* The round insn does not trap on denormals. */
27528 if (flag_trapping_math || !TARGET_ROUND)
27531 if (out_mode == DFmode && in_mode == DFmode)
27533 if (out_n == 2 && in_n == 2)
27534 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27535 else if (out_n == 4 && in_n == 4)
27536 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27540 case BUILT_IN_TRUNCF:
27541 /* The round insn does not trap on denormals. */
27542 if (flag_trapping_math || !TARGET_ROUND)
27545 if (out_mode == SFmode && in_mode == SFmode)
27547 if (out_n == 4 && in_n == 4)
27548 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27549 else if (out_n == 8 && in_n == 8)
27550 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27554 case BUILT_IN_RINT:
27555 /* The round insn does not trap on denormals. */
27556 if (flag_trapping_math || !TARGET_ROUND)
27559 if (out_mode == DFmode && in_mode == DFmode)
27561 if (out_n == 2 && in_n == 2)
27562 return ix86_builtins[IX86_BUILTIN_RINTPD];
27563 else if (out_n == 4 && in_n == 4)
27564 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27568 case BUILT_IN_RINTF:
27569 /* The round insn does not trap on denormals. */
27570 if (flag_trapping_math || !TARGET_ROUND)
27573 if (out_mode == SFmode && in_mode == SFmode)
27575 if (out_n == 4 && in_n == 4)
27576 return ix86_builtins[IX86_BUILTIN_RINTPS];
27577 else if (out_n == 8 && in_n == 8)
27578 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27583 if (out_mode == DFmode && in_mode == DFmode)
27585 if (out_n == 2 && in_n == 2)
27586 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27587 if (out_n == 4 && in_n == 4)
27588 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27592 case BUILT_IN_FMAF:
27593 if (out_mode == SFmode && in_mode == SFmode)
27595 if (out_n == 4 && in_n == 4)
27596 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27597 if (out_n == 8 && in_n == 8)
27598 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27606 /* Dispatch to a handler for a vectorization library. */
27607 if (ix86_veclib_handler)
27608 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27614 /* Handler for an SVML-style interface to
27615 a library with vectorized intrinsics. */
27618 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27621 tree fntype, new_fndecl, args;
27624 enum machine_mode el_mode, in_mode;
27627 /* The SVML is suitable for unsafe math only. */
27628 if (!flag_unsafe_math_optimizations)
27631 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27632 n = TYPE_VECTOR_SUBPARTS (type_out);
27633 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27634 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27635 if (el_mode != in_mode
27643 case BUILT_IN_LOG10:
27645 case BUILT_IN_TANH:
27647 case BUILT_IN_ATAN:
27648 case BUILT_IN_ATAN2:
27649 case BUILT_IN_ATANH:
27650 case BUILT_IN_CBRT:
27651 case BUILT_IN_SINH:
27653 case BUILT_IN_ASINH:
27654 case BUILT_IN_ASIN:
27655 case BUILT_IN_COSH:
27657 case BUILT_IN_ACOSH:
27658 case BUILT_IN_ACOS:
27659 if (el_mode != DFmode || n != 2)
27663 case BUILT_IN_EXPF:
27664 case BUILT_IN_LOGF:
27665 case BUILT_IN_LOG10F:
27666 case BUILT_IN_POWF:
27667 case BUILT_IN_TANHF:
27668 case BUILT_IN_TANF:
27669 case BUILT_IN_ATANF:
27670 case BUILT_IN_ATAN2F:
27671 case BUILT_IN_ATANHF:
27672 case BUILT_IN_CBRTF:
27673 case BUILT_IN_SINHF:
27674 case BUILT_IN_SINF:
27675 case BUILT_IN_ASINHF:
27676 case BUILT_IN_ASINF:
27677 case BUILT_IN_COSHF:
27678 case BUILT_IN_COSF:
27679 case BUILT_IN_ACOSHF:
27680 case BUILT_IN_ACOSF:
27681 if (el_mode != SFmode || n != 4)
27689 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27691 if (fn == BUILT_IN_LOGF)
27692 strcpy (name, "vmlsLn4");
27693 else if (fn == BUILT_IN_LOG)
27694 strcpy (name, "vmldLn2");
27697 sprintf (name, "vmls%s", bname+10);
27698 name[strlen (name)-1] = '4';
27701 sprintf (name, "vmld%s2", bname+10);
27703 /* Convert to uppercase. */
27707 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27708 args = TREE_CHAIN (args))
27712 fntype = build_function_type_list (type_out, type_in, NULL);
27714 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27716 /* Build a function declaration for the vectorized function. */
27717 new_fndecl = build_decl (BUILTINS_LOCATION,
27718 FUNCTION_DECL, get_identifier (name), fntype);
27719 TREE_PUBLIC (new_fndecl) = 1;
27720 DECL_EXTERNAL (new_fndecl) = 1;
27721 DECL_IS_NOVOPS (new_fndecl) = 1;
27722 TREE_READONLY (new_fndecl) = 1;
27727 /* Handler for an ACML-style interface to
27728 a library with vectorized intrinsics. */
27731 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27733 char name[20] = "__vr.._";
27734 tree fntype, new_fndecl, args;
27737 enum machine_mode el_mode, in_mode;
27740 /* The ACML is 64bits only and suitable for unsafe math only as
27741 it does not correctly support parts of IEEE with the required
27742 precision such as denormals. */
27744 || !flag_unsafe_math_optimizations)
27747 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27748 n = TYPE_VECTOR_SUBPARTS (type_out);
27749 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27750 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27751 if (el_mode != in_mode
27761 case BUILT_IN_LOG2:
27762 case BUILT_IN_LOG10:
27765 if (el_mode != DFmode
27770 case BUILT_IN_SINF:
27771 case BUILT_IN_COSF:
27772 case BUILT_IN_EXPF:
27773 case BUILT_IN_POWF:
27774 case BUILT_IN_LOGF:
27775 case BUILT_IN_LOG2F:
27776 case BUILT_IN_LOG10F:
27779 if (el_mode != SFmode
27788 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27789 sprintf (name + 7, "%s", bname+10);
27792 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27793 args = TREE_CHAIN (args))
27797 fntype = build_function_type_list (type_out, type_in, NULL);
27799 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27801 /* Build a function declaration for the vectorized function. */
27802 new_fndecl = build_decl (BUILTINS_LOCATION,
27803 FUNCTION_DECL, get_identifier (name), fntype);
27804 TREE_PUBLIC (new_fndecl) = 1;
27805 DECL_EXTERNAL (new_fndecl) = 1;
27806 DECL_IS_NOVOPS (new_fndecl) = 1;
27807 TREE_READONLY (new_fndecl) = 1;
27813 /* Returns a decl of a function that implements conversion of an integer vector
27814 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27815 are the types involved when converting according to CODE.
27816 Return NULL_TREE if it is not available. */
27819 ix86_vectorize_builtin_conversion (unsigned int code,
27820 tree dest_type, tree src_type)
27828 switch (TYPE_MODE (src_type))
27831 switch (TYPE_MODE (dest_type))
27834 return (TYPE_UNSIGNED (src_type)
27835 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27836 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27838 return (TYPE_UNSIGNED (src_type)
27840 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27846 switch (TYPE_MODE (dest_type))
27849 return (TYPE_UNSIGNED (src_type)
27851 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27860 case FIX_TRUNC_EXPR:
27861 switch (TYPE_MODE (dest_type))
27864 switch (TYPE_MODE (src_type))
27867 return (TYPE_UNSIGNED (dest_type)
27869 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27871 return (TYPE_UNSIGNED (dest_type)
27873 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27880 switch (TYPE_MODE (src_type))
27883 return (TYPE_UNSIGNED (dest_type)
27885 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27902 /* Returns a code for a target-specific builtin that implements
27903 reciprocal of the function, or NULL_TREE if not available. */
27906 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27907 bool sqrt ATTRIBUTE_UNUSED)
27909 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27910 && flag_finite_math_only && !flag_trapping_math
27911 && flag_unsafe_math_optimizations))
27915 /* Machine dependent builtins. */
27918 /* Vectorized version of sqrt to rsqrt conversion. */
27919 case IX86_BUILTIN_SQRTPS_NR:
27920 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27922 case IX86_BUILTIN_SQRTPS_NR256:
27923 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27929 /* Normal builtins. */
27932 /* Sqrt to rsqrt conversion. */
27933 case BUILT_IN_SQRTF:
27934 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27941 /* Helper for avx_vpermilps256_operand et al. This is also used by
27942 the expansion functions to turn the parallel back into a mask.
27943 The return value is 0 for no match and the imm8+1 for a match. */
27946 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27948 unsigned i, nelt = GET_MODE_NUNITS (mode);
27950 unsigned char ipar[8];
27952 if (XVECLEN (par, 0) != (int) nelt)
27955 /* Validate that all of the elements are constants, and not totally
27956 out of range. Copy the data into an integral array to make the
27957 subsequent checks easier. */
27958 for (i = 0; i < nelt; ++i)
27960 rtx er = XVECEXP (par, 0, i);
27961 unsigned HOST_WIDE_INT ei;
27963 if (!CONST_INT_P (er))
27974 /* In the 256-bit DFmode case, we can only move elements within
27976 for (i = 0; i < 2; ++i)
27980 mask |= ipar[i] << i;
27982 for (i = 2; i < 4; ++i)
27986 mask |= (ipar[i] - 2) << i;
27991 /* In the 256-bit SFmode case, we have full freedom of movement
27992 within the low 128-bit lane, but the high 128-bit lane must
27993 mirror the exact same pattern. */
27994 for (i = 0; i < 4; ++i)
27995 if (ipar[i] + 4 != ipar[i + 4])
28002 /* In the 128-bit case, we've full freedom in the placement of
28003 the elements from the source operand. */
28004 for (i = 0; i < nelt; ++i)
28005 mask |= ipar[i] << (i * (nelt / 2));
28009 gcc_unreachable ();
28012 /* Make sure success has a non-zero value by adding one. */
28016 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28017 the expansion functions to turn the parallel back into a mask.
28018 The return value is 0 for no match and the imm8+1 for a match. */
28021 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28023 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28025 unsigned char ipar[8];
28027 if (XVECLEN (par, 0) != (int) nelt)
28030 /* Validate that all of the elements are constants, and not totally
28031 out of range. Copy the data into an integral array to make the
28032 subsequent checks easier. */
28033 for (i = 0; i < nelt; ++i)
28035 rtx er = XVECEXP (par, 0, i);
28036 unsigned HOST_WIDE_INT ei;
28038 if (!CONST_INT_P (er))
28041 if (ei >= 2 * nelt)
28046 /* Validate that the halves of the permute are halves. */
28047 for (i = 0; i < nelt2 - 1; ++i)
28048 if (ipar[i] + 1 != ipar[i + 1])
28050 for (i = nelt2; i < nelt - 1; ++i)
28051 if (ipar[i] + 1 != ipar[i + 1])
28054 /* Reconstruct the mask. */
28055 for (i = 0; i < 2; ++i)
28057 unsigned e = ipar[i * nelt2];
28061 mask |= e << (i * 4);
28064 /* Make sure success has a non-zero value by adding one. */
28069 /* Store OPERAND to the memory after reload is completed. This means
28070 that we can't easily use assign_stack_local. */
28072 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28076 gcc_assert (reload_completed);
28077 if (ix86_using_red_zone ())
28079 result = gen_rtx_MEM (mode,
28080 gen_rtx_PLUS (Pmode,
28082 GEN_INT (-RED_ZONE_SIZE)));
28083 emit_move_insn (result, operand);
28085 else if (TARGET_64BIT)
28091 operand = gen_lowpart (DImode, operand);
28095 gen_rtx_SET (VOIDmode,
28096 gen_rtx_MEM (DImode,
28097 gen_rtx_PRE_DEC (DImode,
28098 stack_pointer_rtx)),
28102 gcc_unreachable ();
28104 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28113 split_double_mode (mode, &operand, 1, operands, operands + 1);
28115 gen_rtx_SET (VOIDmode,
28116 gen_rtx_MEM (SImode,
28117 gen_rtx_PRE_DEC (Pmode,
28118 stack_pointer_rtx)),
28121 gen_rtx_SET (VOIDmode,
28122 gen_rtx_MEM (SImode,
28123 gen_rtx_PRE_DEC (Pmode,
28124 stack_pointer_rtx)),
28129 /* Store HImodes as SImodes. */
28130 operand = gen_lowpart (SImode, operand);
28134 gen_rtx_SET (VOIDmode,
28135 gen_rtx_MEM (GET_MODE (operand),
28136 gen_rtx_PRE_DEC (SImode,
28137 stack_pointer_rtx)),
28141 gcc_unreachable ();
28143 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28148 /* Free operand from the memory. */
28150 ix86_free_from_memory (enum machine_mode mode)
28152 if (!ix86_using_red_zone ())
28156 if (mode == DImode || TARGET_64BIT)
28160 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28161 to pop or add instruction if registers are available. */
28162 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28163 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28168 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28170 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28171 QImode must go into class Q_REGS.
28172 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28173 movdf to do mem-to-mem moves through integer regs. */
28176 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28178 enum machine_mode mode = GET_MODE (x);
28180 /* We're only allowed to return a subclass of CLASS. Many of the
28181 following checks fail for NO_REGS, so eliminate that early. */
28182 if (regclass == NO_REGS)
28185 /* All classes can load zeros. */
28186 if (x == CONST0_RTX (mode))
28189 /* Force constants into memory if we are loading a (nonzero) constant into
28190 an MMX or SSE register. This is because there are no MMX/SSE instructions
28191 to load from a constant. */
28193 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28196 /* Prefer SSE regs only, if we can use them for math. */
28197 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28198 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28200 /* Floating-point constants need more complex checks. */
28201 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28203 /* General regs can load everything. */
28204 if (reg_class_subset_p (regclass, GENERAL_REGS))
28207 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28208 zero above. We only want to wind up preferring 80387 registers if
28209 we plan on doing computation with them. */
28211 && standard_80387_constant_p (x) > 0)
28213 /* Limit class to non-sse. */
28214 if (regclass == FLOAT_SSE_REGS)
28216 if (regclass == FP_TOP_SSE_REGS)
28218 if (regclass == FP_SECOND_SSE_REGS)
28219 return FP_SECOND_REG;
28220 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28227 /* Generally when we see PLUS here, it's the function invariant
28228 (plus soft-fp const_int). Which can only be computed into general
28230 if (GET_CODE (x) == PLUS)
28231 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28233 /* QImode constants are easy to load, but non-constant QImode data
28234 must go into Q_REGS. */
28235 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28237 if (reg_class_subset_p (regclass, Q_REGS))
28239 if (reg_class_subset_p (Q_REGS, regclass))
28247 /* Discourage putting floating-point values in SSE registers unless
28248 SSE math is being used, and likewise for the 387 registers. */
28250 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28252 enum machine_mode mode = GET_MODE (x);
28254 /* Restrict the output reload class to the register bank that we are doing
28255 math on. If we would like not to return a subset of CLASS, reject this
28256 alternative: if reload cannot do this, it will still use its choice. */
28257 mode = GET_MODE (x);
28258 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28259 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28261 if (X87_FLOAT_MODE_P (mode))
28263 if (regclass == FP_TOP_SSE_REGS)
28265 else if (regclass == FP_SECOND_SSE_REGS)
28266 return FP_SECOND_REG;
28268 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28275 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28276 enum machine_mode mode, secondary_reload_info *sri)
28278 /* Double-word spills from general registers to non-offsettable memory
28279 references (zero-extended addresses) require special handling. */
28282 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
28283 && rclass == GENERAL_REGS
28284 && !offsettable_memref_p (x))
28287 ? CODE_FOR_reload_noff_load
28288 : CODE_FOR_reload_noff_store);
28289 /* Add the cost of moving address to a temporary. */
28290 sri->extra_cost = 1;
28295 /* QImode spills from non-QI registers require
28296 intermediate register on 32bit targets. */
28298 && !in_p && mode == QImode
28299 && (rclass == GENERAL_REGS
28300 || rclass == LEGACY_REGS
28301 || rclass == INDEX_REGS))
28310 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28311 regno = true_regnum (x);
28313 /* Return Q_REGS if the operand is in memory. */
28318 /* This condition handles corner case where an expression involving
28319 pointers gets vectorized. We're trying to use the address of a
28320 stack slot as a vector initializer.
28322 (set (reg:V2DI 74 [ vect_cst_.2 ])
28323 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28325 Eventually frame gets turned into sp+offset like this:
28327 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28328 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28329 (const_int 392 [0x188]))))
28331 That later gets turned into:
28333 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28334 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28335 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28337 We'll have the following reload recorded:
28339 Reload 0: reload_in (DI) =
28340 (plus:DI (reg/f:DI 7 sp)
28341 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28342 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28343 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28344 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28345 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28346 reload_reg_rtx: (reg:V2DI 22 xmm1)
28348 Which isn't going to work since SSE instructions can't handle scalar
28349 additions. Returning GENERAL_REGS forces the addition into integer
28350 register and reload can handle subsequent reloads without problems. */
28352 if (in_p && GET_CODE (x) == PLUS
28353 && SSE_CLASS_P (rclass)
28354 && SCALAR_INT_MODE_P (mode))
28355 return GENERAL_REGS;
28360 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28363 ix86_class_likely_spilled_p (reg_class_t rclass)
28374 case SSE_FIRST_REG:
28376 case FP_SECOND_REG:
28386 /* If we are copying between general and FP registers, we need a memory
28387 location. The same is true for SSE and MMX registers.
28389 To optimize register_move_cost performance, allow inline variant.
28391 The macro can't work reliably when one of the CLASSES is class containing
28392 registers from multiple units (SSE, MMX, integer). We avoid this by never
28393 combining those units in single alternative in the machine description.
28394 Ensure that this constraint holds to avoid unexpected surprises.
28396 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28397 enforce these sanity checks. */
28400 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28401 enum machine_mode mode, int strict)
28403 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28404 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28405 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28406 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28407 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28408 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28410 gcc_assert (!strict);
28414 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28417 /* ??? This is a lie. We do have moves between mmx/general, and for
28418 mmx/sse2. But by saying we need secondary memory we discourage the
28419 register allocator from using the mmx registers unless needed. */
28420 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28423 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28425 /* SSE1 doesn't have any direct moves from other classes. */
28429 /* If the target says that inter-unit moves are more expensive
28430 than moving through memory, then don't generate them. */
28431 if (!TARGET_INTER_UNIT_MOVES)
28434 /* Between SSE and general, we have moves no larger than word size. */
28435 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28443 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28444 enum machine_mode mode, int strict)
28446 return inline_secondary_memory_needed (class1, class2, mode, strict);
28449 /* Implement the TARGET_CLASS_MAX_NREGS hook.
28451 On the 80386, this is the size of MODE in words,
28452 except in the FP regs, where a single reg is always enough. */
28454 static unsigned char
28455 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
28457 if (MAYBE_INTEGER_CLASS_P (rclass))
28459 if (mode == XFmode)
28460 return (TARGET_64BIT ? 2 : 3);
28461 else if (mode == XCmode)
28462 return (TARGET_64BIT ? 4 : 6);
28464 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
28468 if (COMPLEX_MODE_P (mode))
28475 /* Return true if the registers in CLASS cannot represent the change from
28476 modes FROM to TO. */
28479 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28480 enum reg_class regclass)
28485 /* x87 registers can't do subreg at all, as all values are reformatted
28486 to extended precision. */
28487 if (MAYBE_FLOAT_CLASS_P (regclass))
28490 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28492 /* Vector registers do not support QI or HImode loads. If we don't
28493 disallow a change to these modes, reload will assume it's ok to
28494 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28495 the vec_dupv4hi pattern. */
28496 if (GET_MODE_SIZE (from) < 4)
28499 /* Vector registers do not support subreg with nonzero offsets, which
28500 are otherwise valid for integer registers. Since we can't see
28501 whether we have a nonzero offset from here, prohibit all
28502 nonparadoxical subregs changing size. */
28503 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28510 /* Return the cost of moving data of mode M between a
28511 register and memory. A value of 2 is the default; this cost is
28512 relative to those in `REGISTER_MOVE_COST'.
28514 This function is used extensively by register_move_cost that is used to
28515 build tables at startup. Make it inline in this case.
28516 When IN is 2, return maximum of in and out move cost.
28518 If moving between registers and memory is more expensive than
28519 between two registers, you should define this macro to express the
28522 Model also increased moving costs of QImode registers in non
28526 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28530 if (FLOAT_CLASS_P (regclass))
28548 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28549 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28551 if (SSE_CLASS_P (regclass))
28554 switch (GET_MODE_SIZE (mode))
28569 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28570 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28572 if (MMX_CLASS_P (regclass))
28575 switch (GET_MODE_SIZE (mode))
28587 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28588 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28590 switch (GET_MODE_SIZE (mode))
28593 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28596 return ix86_cost->int_store[0];
28597 if (TARGET_PARTIAL_REG_DEPENDENCY
28598 && optimize_function_for_speed_p (cfun))
28599 cost = ix86_cost->movzbl_load;
28601 cost = ix86_cost->int_load[0];
28603 return MAX (cost, ix86_cost->int_store[0]);
28609 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28611 return ix86_cost->movzbl_load;
28613 return ix86_cost->int_store[0] + 4;
28618 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28619 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28621 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28622 if (mode == TFmode)
28625 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28627 cost = ix86_cost->int_load[2];
28629 cost = ix86_cost->int_store[2];
28630 return (cost * (((int) GET_MODE_SIZE (mode)
28631 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28636 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28639 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28643 /* Return the cost of moving data from a register in class CLASS1 to
28644 one in class CLASS2.
28646 It is not required that the cost always equal 2 when FROM is the same as TO;
28647 on some machines it is expensive to move between registers if they are not
28648 general registers. */
28651 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28652 reg_class_t class2_i)
28654 enum reg_class class1 = (enum reg_class) class1_i;
28655 enum reg_class class2 = (enum reg_class) class2_i;
28657 /* In case we require secondary memory, compute cost of the store followed
28658 by load. In order to avoid bad register allocation choices, we need
28659 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28661 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28665 cost += inline_memory_move_cost (mode, class1, 2);
28666 cost += inline_memory_move_cost (mode, class2, 2);
28668 /* In case of copying from general_purpose_register we may emit multiple
28669 stores followed by single load causing memory size mismatch stall.
28670 Count this as arbitrarily high cost of 20. */
28671 if (targetm.class_max_nregs (class1, mode)
28672 > targetm.class_max_nregs (class2, mode))
28675 /* In the case of FP/MMX moves, the registers actually overlap, and we
28676 have to switch modes in order to treat them differently. */
28677 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28678 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28684 /* Moves between SSE/MMX and integer unit are expensive. */
28685 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28686 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28688 /* ??? By keeping returned value relatively high, we limit the number
28689 of moves between integer and MMX/SSE registers for all targets.
28690 Additionally, high value prevents problem with x86_modes_tieable_p(),
28691 where integer modes in MMX/SSE registers are not tieable
28692 because of missing QImode and HImode moves to, from or between
28693 MMX/SSE registers. */
28694 return MAX (8, ix86_cost->mmxsse_to_integer);
28696 if (MAYBE_FLOAT_CLASS_P (class1))
28697 return ix86_cost->fp_move;
28698 if (MAYBE_SSE_CLASS_P (class1))
28699 return ix86_cost->sse_move;
28700 if (MAYBE_MMX_CLASS_P (class1))
28701 return ix86_cost->mmx_move;
28705 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28709 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28711 /* Flags and only flags can only hold CCmode values. */
28712 if (CC_REGNO_P (regno))
28713 return GET_MODE_CLASS (mode) == MODE_CC;
28714 if (GET_MODE_CLASS (mode) == MODE_CC
28715 || GET_MODE_CLASS (mode) == MODE_RANDOM
28716 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28718 if (FP_REGNO_P (regno))
28719 return VALID_FP_MODE_P (mode);
28720 if (SSE_REGNO_P (regno))
28722 /* We implement the move patterns for all vector modes into and
28723 out of SSE registers, even when no operation instructions
28724 are available. OImode move is available only when AVX is
28726 return ((TARGET_AVX && mode == OImode)
28727 || VALID_AVX256_REG_MODE (mode)
28728 || VALID_SSE_REG_MODE (mode)
28729 || VALID_SSE2_REG_MODE (mode)
28730 || VALID_MMX_REG_MODE (mode)
28731 || VALID_MMX_REG_MODE_3DNOW (mode));
28733 if (MMX_REGNO_P (regno))
28735 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28736 so if the register is available at all, then we can move data of
28737 the given mode into or out of it. */
28738 return (VALID_MMX_REG_MODE (mode)
28739 || VALID_MMX_REG_MODE_3DNOW (mode));
28742 if (mode == QImode)
28744 /* Take care for QImode values - they can be in non-QI regs,
28745 but then they do cause partial register stalls. */
28746 if (regno <= BX_REG || TARGET_64BIT)
28748 if (!TARGET_PARTIAL_REG_STALL)
28750 return !can_create_pseudo_p ();
28752 /* We handle both integer and floats in the general purpose registers. */
28753 else if (VALID_INT_MODE_P (mode))
28755 else if (VALID_FP_MODE_P (mode))
28757 else if (VALID_DFP_MODE_P (mode))
28759 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28760 on to use that value in smaller contexts, this can easily force a
28761 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28762 supporting DImode, allow it. */
28763 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28769 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28770 tieable integer mode. */
28773 ix86_tieable_integer_mode_p (enum machine_mode mode)
28782 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28785 return TARGET_64BIT;
28792 /* Return true if MODE1 is accessible in a register that can hold MODE2
28793 without copying. That is, all register classes that can hold MODE2
28794 can also hold MODE1. */
28797 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28799 if (mode1 == mode2)
28802 if (ix86_tieable_integer_mode_p (mode1)
28803 && ix86_tieable_integer_mode_p (mode2))
28806 /* MODE2 being XFmode implies fp stack or general regs, which means we
28807 can tie any smaller floating point modes to it. Note that we do not
28808 tie this with TFmode. */
28809 if (mode2 == XFmode)
28810 return mode1 == SFmode || mode1 == DFmode;
28812 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28813 that we can tie it with SFmode. */
28814 if (mode2 == DFmode)
28815 return mode1 == SFmode;
28817 /* If MODE2 is only appropriate for an SSE register, then tie with
28818 any other mode acceptable to SSE registers. */
28819 if (GET_MODE_SIZE (mode2) == 16
28820 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28821 return (GET_MODE_SIZE (mode1) == 16
28822 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28824 /* If MODE2 is appropriate for an MMX register, then tie
28825 with any other mode acceptable to MMX registers. */
28826 if (GET_MODE_SIZE (mode2) == 8
28827 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28828 return (GET_MODE_SIZE (mode1) == 8
28829 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28834 /* Compute a (partial) cost for rtx X. Return true if the complete
28835 cost has been computed, and false if subexpressions should be
28836 scanned. In either case, *TOTAL contains the cost result. */
28839 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
28842 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28843 enum machine_mode mode = GET_MODE (x);
28844 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28852 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28854 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28856 else if (flag_pic && SYMBOLIC_CONST (x)
28858 || (!GET_CODE (x) != LABEL_REF
28859 && (GET_CODE (x) != SYMBOL_REF
28860 || !SYMBOL_REF_LOCAL_P (x)))))
28867 if (mode == VOIDmode)
28870 switch (standard_80387_constant_p (x))
28875 default: /* Other constants */
28880 /* Start with (MEM (SYMBOL_REF)), since that's where
28881 it'll probably end up. Add a penalty for size. */
28882 *total = (COSTS_N_INSNS (1)
28883 + (flag_pic != 0 && !TARGET_64BIT)
28884 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28890 /* The zero extensions is often completely free on x86_64, so make
28891 it as cheap as possible. */
28892 if (TARGET_64BIT && mode == DImode
28893 && GET_MODE (XEXP (x, 0)) == SImode)
28895 else if (TARGET_ZERO_EXTEND_WITH_AND)
28896 *total = cost->add;
28898 *total = cost->movzx;
28902 *total = cost->movsx;
28906 if (CONST_INT_P (XEXP (x, 1))
28907 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28909 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28912 *total = cost->add;
28915 if ((value == 2 || value == 3)
28916 && cost->lea <= cost->shift_const)
28918 *total = cost->lea;
28928 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28930 if (CONST_INT_P (XEXP (x, 1)))
28932 if (INTVAL (XEXP (x, 1)) > 32)
28933 *total = cost->shift_const + COSTS_N_INSNS (2);
28935 *total = cost->shift_const * 2;
28939 if (GET_CODE (XEXP (x, 1)) == AND)
28940 *total = cost->shift_var * 2;
28942 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28947 if (CONST_INT_P (XEXP (x, 1)))
28948 *total = cost->shift_const;
28950 *total = cost->shift_var;
28958 gcc_assert (FLOAT_MODE_P (mode));
28959 gcc_assert (TARGET_FMA || TARGET_FMA4);
28961 /* ??? SSE scalar/vector cost should be used here. */
28962 /* ??? Bald assumption that fma has the same cost as fmul. */
28963 *total = cost->fmul;
28964 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
28966 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28968 if (GET_CODE (sub) == NEG)
28969 sub = XEXP (sub, 0);
28970 *total += rtx_cost (sub, FMA, 0, speed);
28973 if (GET_CODE (sub) == NEG)
28974 sub = XEXP (sub, 0);
28975 *total += rtx_cost (sub, FMA, 2, speed);
28980 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28982 /* ??? SSE scalar cost should be used here. */
28983 *total = cost->fmul;
28986 else if (X87_FLOAT_MODE_P (mode))
28988 *total = cost->fmul;
28991 else if (FLOAT_MODE_P (mode))
28993 /* ??? SSE vector cost should be used here. */
28994 *total = cost->fmul;
28999 rtx op0 = XEXP (x, 0);
29000 rtx op1 = XEXP (x, 1);
29002 if (CONST_INT_P (XEXP (x, 1)))
29004 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29005 for (nbits = 0; value != 0; value &= value - 1)
29009 /* This is arbitrary. */
29012 /* Compute costs correctly for widening multiplication. */
29013 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29014 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29015 == GET_MODE_SIZE (mode))
29017 int is_mulwiden = 0;
29018 enum machine_mode inner_mode = GET_MODE (op0);
29020 if (GET_CODE (op0) == GET_CODE (op1))
29021 is_mulwiden = 1, op1 = XEXP (op1, 0);
29022 else if (CONST_INT_P (op1))
29024 if (GET_CODE (op0) == SIGN_EXTEND)
29025 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29028 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29032 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29035 *total = (cost->mult_init[MODE_INDEX (mode)]
29036 + nbits * cost->mult_bit
29037 + rtx_cost (op0, outer_code, opno, speed)
29038 + rtx_cost (op1, outer_code, opno, speed));
29047 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29048 /* ??? SSE cost should be used here. */
29049 *total = cost->fdiv;
29050 else if (X87_FLOAT_MODE_P (mode))
29051 *total = cost->fdiv;
29052 else if (FLOAT_MODE_P (mode))
29053 /* ??? SSE vector cost should be used here. */
29054 *total = cost->fdiv;
29056 *total = cost->divide[MODE_INDEX (mode)];
29060 if (GET_MODE_CLASS (mode) == MODE_INT
29061 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29063 if (GET_CODE (XEXP (x, 0)) == PLUS
29064 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29065 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29066 && CONSTANT_P (XEXP (x, 1)))
29068 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29069 if (val == 2 || val == 4 || val == 8)
29071 *total = cost->lea;
29072 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
29073 outer_code, opno, speed);
29074 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29075 outer_code, opno, speed);
29076 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
29080 else if (GET_CODE (XEXP (x, 0)) == MULT
29081 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29083 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29084 if (val == 2 || val == 4 || val == 8)
29086 *total = cost->lea;
29087 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
29088 outer_code, opno, speed);
29089 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
29093 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29095 *total = cost->lea;
29096 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
29097 outer_code, opno, speed);
29098 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
29099 outer_code, opno, speed);
29100 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
29107 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29109 /* ??? SSE cost should be used here. */
29110 *total = cost->fadd;
29113 else if (X87_FLOAT_MODE_P (mode))
29115 *total = cost->fadd;
29118 else if (FLOAT_MODE_P (mode))
29120 /* ??? SSE vector cost should be used here. */
29121 *total = cost->fadd;
29129 if (!TARGET_64BIT && mode == DImode)
29131 *total = (cost->add * 2
29132 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
29133 << (GET_MODE (XEXP (x, 0)) != DImode))
29134 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
29135 << (GET_MODE (XEXP (x, 1)) != DImode)));
29141 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29143 /* ??? SSE cost should be used here. */
29144 *total = cost->fchs;
29147 else if (X87_FLOAT_MODE_P (mode))
29149 *total = cost->fchs;
29152 else if (FLOAT_MODE_P (mode))
29154 /* ??? SSE vector cost should be used here. */
29155 *total = cost->fchs;
29161 if (!TARGET_64BIT && mode == DImode)
29162 *total = cost->add * 2;
29164 *total = cost->add;
29168 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29169 && XEXP (XEXP (x, 0), 1) == const1_rtx
29170 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29171 && XEXP (x, 1) == const0_rtx)
29173 /* This kind of construct is implemented using test[bwl].
29174 Treat it as if we had an AND. */
29175 *total = (cost->add
29176 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
29177 + rtx_cost (const1_rtx, outer_code, opno, speed));
29183 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29188 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29189 /* ??? SSE cost should be used here. */
29190 *total = cost->fabs;
29191 else if (X87_FLOAT_MODE_P (mode))
29192 *total = cost->fabs;
29193 else if (FLOAT_MODE_P (mode))
29194 /* ??? SSE vector cost should be used here. */
29195 *total = cost->fabs;
29199 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29200 /* ??? SSE cost should be used here. */
29201 *total = cost->fsqrt;
29202 else if (X87_FLOAT_MODE_P (mode))
29203 *total = cost->fsqrt;
29204 else if (FLOAT_MODE_P (mode))
29205 /* ??? SSE vector cost should be used here. */
29206 *total = cost->fsqrt;
29210 if (XINT (x, 1) == UNSPEC_TP)
29217 case VEC_DUPLICATE:
29218 /* ??? Assume all of these vector manipulation patterns are
29219 recognizable. In which case they all pretty much have the
29221 *total = COSTS_N_INSNS (1);
29231 static int current_machopic_label_num;
29233 /* Given a symbol name and its associated stub, write out the
29234 definition of the stub. */
29237 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29239 unsigned int length;
29240 char *binder_name, *symbol_name, lazy_ptr_name[32];
29241 int label = ++current_machopic_label_num;
29243 /* For 64-bit we shouldn't get here. */
29244 gcc_assert (!TARGET_64BIT);
29246 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29247 symb = targetm.strip_name_encoding (symb);
29249 length = strlen (stub);
29250 binder_name = XALLOCAVEC (char, length + 32);
29251 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29253 length = strlen (symb);
29254 symbol_name = XALLOCAVEC (char, length + 32);
29255 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29257 sprintf (lazy_ptr_name, "L%d$lz", label);
29259 if (MACHOPIC_ATT_STUB)
29260 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29261 else if (MACHOPIC_PURE)
29262 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29264 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29266 fprintf (file, "%s:\n", stub);
29267 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29269 if (MACHOPIC_ATT_STUB)
29271 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29273 else if (MACHOPIC_PURE)
29276 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29277 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29278 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29279 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
29280 label, lazy_ptr_name, label);
29281 fprintf (file, "\tjmp\t*%%ecx\n");
29284 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29286 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29287 it needs no stub-binding-helper. */
29288 if (MACHOPIC_ATT_STUB)
29291 fprintf (file, "%s:\n", binder_name);
29295 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29296 fprintf (file, "\tpushl\t%%ecx\n");
29299 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29301 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29303 /* N.B. Keep the correspondence of these
29304 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29305 old-pic/new-pic/non-pic stubs; altering this will break
29306 compatibility with existing dylibs. */
29309 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29310 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29313 /* 16-byte -mdynamic-no-pic stub. */
29314 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29316 fprintf (file, "%s:\n", lazy_ptr_name);
29317 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29318 fprintf (file, ASM_LONG "%s\n", binder_name);
29320 #endif /* TARGET_MACHO */
29322 /* Order the registers for register allocator. */
29325 x86_order_regs_for_local_alloc (void)
29330 /* First allocate the local general purpose registers. */
29331 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29332 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29333 reg_alloc_order [pos++] = i;
29335 /* Global general purpose registers. */
29336 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29337 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29338 reg_alloc_order [pos++] = i;
29340 /* x87 registers come first in case we are doing FP math
29342 if (!TARGET_SSE_MATH)
29343 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29344 reg_alloc_order [pos++] = i;
29346 /* SSE registers. */
29347 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29348 reg_alloc_order [pos++] = i;
29349 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29350 reg_alloc_order [pos++] = i;
29352 /* x87 registers. */
29353 if (TARGET_SSE_MATH)
29354 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29355 reg_alloc_order [pos++] = i;
29357 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29358 reg_alloc_order [pos++] = i;
29360 /* Initialize the rest of array as we do not allocate some registers
29362 while (pos < FIRST_PSEUDO_REGISTER)
29363 reg_alloc_order [pos++] = 0;
29366 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29367 in struct attribute_spec handler. */
29369 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29371 int flags ATTRIBUTE_UNUSED,
29372 bool *no_add_attrs)
29374 if (TREE_CODE (*node) != FUNCTION_TYPE
29375 && TREE_CODE (*node) != METHOD_TYPE
29376 && TREE_CODE (*node) != FIELD_DECL
29377 && TREE_CODE (*node) != TYPE_DECL)
29379 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29381 *no_add_attrs = true;
29386 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29388 *no_add_attrs = true;
29391 if (is_attribute_p ("callee_pop_aggregate_return", name))
29395 cst = TREE_VALUE (args);
29396 if (TREE_CODE (cst) != INTEGER_CST)
29398 warning (OPT_Wattributes,
29399 "%qE attribute requires an integer constant argument",
29401 *no_add_attrs = true;
29403 else if (compare_tree_int (cst, 0) != 0
29404 && compare_tree_int (cst, 1) != 0)
29406 warning (OPT_Wattributes,
29407 "argument to %qE attribute is neither zero, nor one",
29409 *no_add_attrs = true;
29418 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29419 struct attribute_spec.handler. */
29421 ix86_handle_abi_attribute (tree *node, tree name,
29422 tree args ATTRIBUTE_UNUSED,
29423 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29425 if (TREE_CODE (*node) != FUNCTION_TYPE
29426 && TREE_CODE (*node) != METHOD_TYPE
29427 && TREE_CODE (*node) != FIELD_DECL
29428 && TREE_CODE (*node) != TYPE_DECL)
29430 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29432 *no_add_attrs = true;
29436 /* Can combine regparm with all attributes but fastcall. */
29437 if (is_attribute_p ("ms_abi", name))
29439 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29441 error ("ms_abi and sysv_abi attributes are not compatible");
29446 else if (is_attribute_p ("sysv_abi", name))
29448 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29450 error ("ms_abi and sysv_abi attributes are not compatible");
29459 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29460 struct attribute_spec.handler. */
29462 ix86_handle_struct_attribute (tree *node, tree name,
29463 tree args ATTRIBUTE_UNUSED,
29464 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29467 if (DECL_P (*node))
29469 if (TREE_CODE (*node) == TYPE_DECL)
29470 type = &TREE_TYPE (*node);
29475 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29476 || TREE_CODE (*type) == UNION_TYPE)))
29478 warning (OPT_Wattributes, "%qE attribute ignored",
29480 *no_add_attrs = true;
29483 else if ((is_attribute_p ("ms_struct", name)
29484 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29485 || ((is_attribute_p ("gcc_struct", name)
29486 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29488 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29490 *no_add_attrs = true;
29497 ix86_handle_fndecl_attribute (tree *node, tree name,
29498 tree args ATTRIBUTE_UNUSED,
29499 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29501 if (TREE_CODE (*node) != FUNCTION_DECL)
29503 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29505 *no_add_attrs = true;
29511 ix86_ms_bitfield_layout_p (const_tree record_type)
29513 return ((TARGET_MS_BITFIELD_LAYOUT
29514 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29515 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29518 /* Returns an expression indicating where the this parameter is
29519 located on entry to the FUNCTION. */
29522 x86_this_parameter (tree function)
29524 tree type = TREE_TYPE (function);
29525 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29530 const int *parm_regs;
29532 if (ix86_function_type_abi (type) == MS_ABI)
29533 parm_regs = x86_64_ms_abi_int_parameter_registers;
29535 parm_regs = x86_64_int_parameter_registers;
29536 return gen_rtx_REG (DImode, parm_regs[aggr]);
29539 nregs = ix86_function_regparm (type, function);
29541 if (nregs > 0 && !stdarg_p (type))
29544 unsigned int ccvt = ix86_get_callcvt (type);
29546 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29547 regno = aggr ? DX_REG : CX_REG;
29548 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29552 return gen_rtx_MEM (SImode,
29553 plus_constant (stack_pointer_rtx, 4));
29562 return gen_rtx_MEM (SImode,
29563 plus_constant (stack_pointer_rtx, 4));
29566 return gen_rtx_REG (SImode, regno);
29569 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29572 /* Determine whether x86_output_mi_thunk can succeed. */
29575 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29576 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29577 HOST_WIDE_INT vcall_offset, const_tree function)
29579 /* 64-bit can handle anything. */
29583 /* For 32-bit, everything's fine if we have one free register. */
29584 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29587 /* Need a free register for vcall_offset. */
29591 /* Need a free register for GOT references. */
29592 if (flag_pic && !targetm.binds_local_p (function))
29595 /* Otherwise ok. */
29599 /* Output the assembler code for a thunk function. THUNK_DECL is the
29600 declaration for the thunk function itself, FUNCTION is the decl for
29601 the target function. DELTA is an immediate constant offset to be
29602 added to THIS. If VCALL_OFFSET is nonzero, the word at
29603 *(*this + vcall_offset) should be added to THIS. */
29606 x86_output_mi_thunk (FILE *file,
29607 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29608 HOST_WIDE_INT vcall_offset, tree function)
29610 rtx this_param = x86_this_parameter (function);
29611 rtx this_reg, tmp, fnaddr;
29613 emit_note (NOTE_INSN_PROLOGUE_END);
29615 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29616 pull it in now and let DELTA benefit. */
29617 if (REG_P (this_param))
29618 this_reg = this_param;
29619 else if (vcall_offset)
29621 /* Put the this parameter into %eax. */
29622 this_reg = gen_rtx_REG (Pmode, AX_REG);
29623 emit_move_insn (this_reg, this_param);
29626 this_reg = NULL_RTX;
29628 /* Adjust the this parameter by a fixed constant. */
29631 rtx delta_rtx = GEN_INT (delta);
29632 rtx delta_dst = this_reg ? this_reg : this_param;
29636 if (!x86_64_general_operand (delta_rtx, Pmode))
29638 tmp = gen_rtx_REG (Pmode, R10_REG);
29639 emit_move_insn (tmp, delta_rtx);
29644 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
29647 /* Adjust the this parameter by a value stored in the vtable. */
29650 rtx vcall_addr, vcall_mem, this_mem;
29651 unsigned int tmp_regno;
29654 tmp_regno = R10_REG;
29657 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29658 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29659 tmp_regno = AX_REG;
29661 tmp_regno = CX_REG;
29663 tmp = gen_rtx_REG (Pmode, tmp_regno);
29665 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
29666 if (Pmode != ptr_mode)
29667 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
29668 emit_move_insn (tmp, this_mem);
29670 /* Adjust the this parameter. */
29671 vcall_addr = plus_constant (tmp, vcall_offset);
29673 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
29675 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
29676 emit_move_insn (tmp2, GEN_INT (vcall_offset));
29677 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
29680 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
29681 if (Pmode != ptr_mode)
29682 emit_insn (gen_addsi_1_zext (this_reg,
29683 gen_rtx_REG (ptr_mode,
29687 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
29690 /* If necessary, drop THIS back to its stack slot. */
29691 if (this_reg && this_reg != this_param)
29692 emit_move_insn (this_param, this_reg);
29694 fnaddr = XEXP (DECL_RTL (function), 0);
29697 if (!flag_pic || targetm.binds_local_p (function)
29698 || cfun->machine->call_abi == MS_ABI)
29702 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
29703 tmp = gen_rtx_CONST (Pmode, tmp);
29704 fnaddr = gen_rtx_MEM (Pmode, tmp);
29709 if (!flag_pic || targetm.binds_local_p (function))
29712 else if (TARGET_MACHO)
29714 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
29715 fnaddr = XEXP (fnaddr, 0);
29717 #endif /* TARGET_MACHO */
29720 tmp = gen_rtx_REG (Pmode, CX_REG);
29721 output_set_got (tmp, NULL_RTX);
29723 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
29724 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
29725 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
29729 /* Our sibling call patterns do not allow memories, because we have no
29730 predicate that can distinguish between frame and non-frame memory.
29731 For our purposes here, we can get away with (ab)using a jump pattern,
29732 because we're going to do no optimization. */
29733 if (MEM_P (fnaddr))
29734 emit_jump_insn (gen_indirect_jump (fnaddr));
29737 tmp = gen_rtx_MEM (QImode, fnaddr);
29738 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
29739 tmp = emit_call_insn (tmp);
29740 SIBLING_CALL_P (tmp) = 1;
29744 /* Emit just enough of rest_of_compilation to get the insns emitted.
29745 Note that use_thunk calls assemble_start_function et al. */
29746 tmp = get_insns ();
29747 insn_locators_alloc ();
29748 shorten_branches (tmp);
29749 final_start_function (tmp, file, 1);
29750 final (tmp, file, 1);
29751 final_end_function ();
29755 x86_file_start (void)
29757 default_file_start ();
29759 darwin_file_start ();
29761 if (X86_FILE_START_VERSION_DIRECTIVE)
29762 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29763 if (X86_FILE_START_FLTUSED)
29764 fputs ("\t.global\t__fltused\n", asm_out_file);
29765 if (ix86_asm_dialect == ASM_INTEL)
29766 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29770 x86_field_alignment (tree field, int computed)
29772 enum machine_mode mode;
29773 tree type = TREE_TYPE (field);
29775 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29777 mode = TYPE_MODE (strip_array_types (type));
29778 if (mode == DFmode || mode == DCmode
29779 || GET_MODE_CLASS (mode) == MODE_INT
29780 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29781 return MIN (32, computed);
29785 /* Output assembler code to FILE to increment profiler label # LABELNO
29786 for profiling a function entry. */
29788 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29790 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29795 #ifndef NO_PROFILE_COUNTERS
29796 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29799 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29800 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29802 fprintf (file, "\tcall\t%s\n", mcount_name);
29806 #ifndef NO_PROFILE_COUNTERS
29807 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29810 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29814 #ifndef NO_PROFILE_COUNTERS
29815 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29818 fprintf (file, "\tcall\t%s\n", mcount_name);
29822 /* We don't have exact information about the insn sizes, but we may assume
29823 quite safely that we are informed about all 1 byte insns and memory
29824 address sizes. This is enough to eliminate unnecessary padding in
29828 min_insn_size (rtx insn)
29832 if (!INSN_P (insn) || !active_insn_p (insn))
29835 /* Discard alignments we've emit and jump instructions. */
29836 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29837 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29839 if (JUMP_TABLE_DATA_P (insn))
29842 /* Important case - calls are always 5 bytes.
29843 It is common to have many calls in the row. */
29845 && symbolic_reference_mentioned_p (PATTERN (insn))
29846 && !SIBLING_CALL_P (insn))
29848 len = get_attr_length (insn);
29852 /* For normal instructions we rely on get_attr_length being exact,
29853 with a few exceptions. */
29854 if (!JUMP_P (insn))
29856 enum attr_type type = get_attr_type (insn);
29861 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29862 || asm_noperands (PATTERN (insn)) >= 0)
29869 /* Otherwise trust get_attr_length. */
29873 l = get_attr_length_address (insn);
29874 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29883 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29885 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29889 ix86_avoid_jump_mispredicts (void)
29891 rtx insn, start = get_insns ();
29892 int nbytes = 0, njumps = 0;
29895 /* Look for all minimal intervals of instructions containing 4 jumps.
29896 The intervals are bounded by START and INSN. NBYTES is the total
29897 size of instructions in the interval including INSN and not including
29898 START. When the NBYTES is smaller than 16 bytes, it is possible
29899 that the end of START and INSN ends up in the same 16byte page.
29901 The smallest offset in the page INSN can start is the case where START
29902 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29903 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29905 for (insn = start; insn; insn = NEXT_INSN (insn))
29909 if (LABEL_P (insn))
29911 int align = label_to_alignment (insn);
29912 int max_skip = label_to_max_skip (insn);
29916 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29917 already in the current 16 byte page, because otherwise
29918 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29919 bytes to reach 16 byte boundary. */
29921 || (align <= 3 && max_skip != (1 << align) - 1))
29924 fprintf (dump_file, "Label %i with max_skip %i\n",
29925 INSN_UID (insn), max_skip);
29928 while (nbytes + max_skip >= 16)
29930 start = NEXT_INSN (start);
29931 if ((JUMP_P (start)
29932 && GET_CODE (PATTERN (start)) != ADDR_VEC
29933 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29935 njumps--, isjump = 1;
29938 nbytes -= min_insn_size (start);
29944 min_size = min_insn_size (insn);
29945 nbytes += min_size;
29947 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29948 INSN_UID (insn), min_size);
29950 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29951 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29959 start = NEXT_INSN (start);
29960 if ((JUMP_P (start)
29961 && GET_CODE (PATTERN (start)) != ADDR_VEC
29962 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29964 njumps--, isjump = 1;
29967 nbytes -= min_insn_size (start);
29969 gcc_assert (njumps >= 0);
29971 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29972 INSN_UID (start), INSN_UID (insn), nbytes);
29974 if (njumps == 3 && isjump && nbytes < 16)
29976 int padsize = 15 - nbytes + min_insn_size (insn);
29979 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29980 INSN_UID (insn), padsize);
29981 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29987 /* AMD Athlon works faster
29988 when RET is not destination of conditional jump or directly preceded
29989 by other jump instruction. We avoid the penalty by inserting NOP just
29990 before the RET instructions in such cases. */
29992 ix86_pad_returns (void)
29997 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29999 basic_block bb = e->src;
30000 rtx ret = BB_END (bb);
30002 bool replace = false;
30004 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
30005 || optimize_bb_for_size_p (bb))
30007 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
30008 if (active_insn_p (prev) || LABEL_P (prev))
30010 if (prev && LABEL_P (prev))
30015 FOR_EACH_EDGE (e, ei, bb->preds)
30016 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30017 && !(e->flags & EDGE_FALLTHRU))
30022 prev = prev_active_insn (ret);
30024 && ((JUMP_P (prev) && any_condjump_p (prev))
30027 /* Empty functions get branch mispredict even when
30028 the jump destination is not visible to us. */
30029 if (!prev && !optimize_function_for_size_p (cfun))
30034 emit_jump_insn_before (gen_return_internal_long (), ret);
30040 /* Count the minimum number of instructions in BB. Return 4 if the
30041 number of instructions >= 4. */
30044 ix86_count_insn_bb (basic_block bb)
30047 int insn_count = 0;
30049 /* Count number of instructions in this block. Return 4 if the number
30050 of instructions >= 4. */
30051 FOR_BB_INSNS (bb, insn)
30053 /* Only happen in exit blocks. */
30055 && GET_CODE (PATTERN (insn)) == RETURN)
30058 if (NONDEBUG_INSN_P (insn)
30059 && GET_CODE (PATTERN (insn)) != USE
30060 && GET_CODE (PATTERN (insn)) != CLOBBER)
30063 if (insn_count >= 4)
30072 /* Count the minimum number of instructions in code path in BB.
30073 Return 4 if the number of instructions >= 4. */
30076 ix86_count_insn (basic_block bb)
30080 int min_prev_count;
30082 /* Only bother counting instructions along paths with no
30083 more than 2 basic blocks between entry and exit. Given
30084 that BB has an edge to exit, determine if a predecessor
30085 of BB has an edge from entry. If so, compute the number
30086 of instructions in the predecessor block. If there
30087 happen to be multiple such blocks, compute the minimum. */
30088 min_prev_count = 4;
30089 FOR_EACH_EDGE (e, ei, bb->preds)
30092 edge_iterator prev_ei;
30094 if (e->src == ENTRY_BLOCK_PTR)
30096 min_prev_count = 0;
30099 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30101 if (prev_e->src == ENTRY_BLOCK_PTR)
30103 int count = ix86_count_insn_bb (e->src);
30104 if (count < min_prev_count)
30105 min_prev_count = count;
30111 if (min_prev_count < 4)
30112 min_prev_count += ix86_count_insn_bb (bb);
30114 return min_prev_count;
30117 /* Pad short funtion to 4 instructions. */
30120 ix86_pad_short_function (void)
30125 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30127 rtx ret = BB_END (e->src);
30128 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30130 int insn_count = ix86_count_insn (e->src);
30132 /* Pad short function. */
30133 if (insn_count < 4)
30137 /* Find epilogue. */
30140 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30141 insn = PREV_INSN (insn);
30146 /* Two NOPs count as one instruction. */
30147 insn_count = 2 * (4 - insn_count);
30148 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30154 /* Implement machine specific optimizations. We implement padding of returns
30155 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30159 /* We are freeing block_for_insn in the toplev to keep compatibility
30160 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30161 compute_bb_for_insn ();
30163 /* Run the vzeroupper optimization if needed. */
30164 if (TARGET_VZEROUPPER)
30165 move_or_delete_vzeroupper ();
30167 if (optimize && optimize_function_for_speed_p (cfun))
30169 if (TARGET_PAD_SHORT_FUNCTION)
30170 ix86_pad_short_function ();
30171 else if (TARGET_PAD_RETURNS)
30172 ix86_pad_returns ();
30173 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30174 if (TARGET_FOUR_JUMP_LIMIT)
30175 ix86_avoid_jump_mispredicts ();
30180 /* Return nonzero when QImode register that must be represented via REX prefix
30183 x86_extended_QIreg_mentioned_p (rtx insn)
30186 extract_insn_cached (insn);
30187 for (i = 0; i < recog_data.n_operands; i++)
30188 if (REG_P (recog_data.operand[i])
30189 && REGNO (recog_data.operand[i]) > BX_REG)
30194 /* Return nonzero when P points to register encoded via REX prefix.
30195 Called via for_each_rtx. */
30197 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30199 unsigned int regno;
30202 regno = REGNO (*p);
30203 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30206 /* Return true when INSN mentions register that must be encoded using REX
30209 x86_extended_reg_mentioned_p (rtx insn)
30211 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30212 extended_reg_mentioned_1, NULL);
30215 /* If profitable, negate (without causing overflow) integer constant
30216 of mode MODE at location LOC. Return true in this case. */
30218 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30222 if (!CONST_INT_P (*loc))
30228 /* DImode x86_64 constants must fit in 32 bits. */
30229 gcc_assert (x86_64_immediate_operand (*loc, mode));
30240 gcc_unreachable ();
30243 /* Avoid overflows. */
30244 if (mode_signbit_p (mode, *loc))
30247 val = INTVAL (*loc);
30249 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30250 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30251 if ((val < 0 && val != -128)
30254 *loc = GEN_INT (-val);
30261 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30262 optabs would emit if we didn't have TFmode patterns. */
30265 x86_emit_floatuns (rtx operands[2])
30267 rtx neglab, donelab, i0, i1, f0, in, out;
30268 enum machine_mode mode, inmode;
30270 inmode = GET_MODE (operands[1]);
30271 gcc_assert (inmode == SImode || inmode == DImode);
30274 in = force_reg (inmode, operands[1]);
30275 mode = GET_MODE (out);
30276 neglab = gen_label_rtx ();
30277 donelab = gen_label_rtx ();
30278 f0 = gen_reg_rtx (mode);
30280 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30282 expand_float (out, in, 0);
30284 emit_jump_insn (gen_jump (donelab));
30287 emit_label (neglab);
30289 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30291 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30293 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30295 expand_float (f0, i0, 0);
30297 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30299 emit_label (donelab);
30302 /* AVX does not support 32-byte integer vector operations,
30303 thus the longest vector we are faced with is V16QImode. */
30304 #define MAX_VECT_LEN 16
30306 struct expand_vec_perm_d
30308 rtx target, op0, op1;
30309 unsigned char perm[MAX_VECT_LEN];
30310 enum machine_mode vmode;
30311 unsigned char nelt;
30315 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30316 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30318 /* Get a vector mode of the same size as the original but with elements
30319 twice as wide. This is only guaranteed to apply to integral vectors. */
30321 static inline enum machine_mode
30322 get_mode_wider_vector (enum machine_mode o)
30324 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30325 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30326 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30327 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30331 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30332 with all elements equal to VAR. Return true if successful. */
30335 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30336 rtx target, rtx val)
30359 /* First attempt to recognize VAL as-is. */
30360 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30361 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30362 if (recog_memoized (insn) < 0)
30365 /* If that fails, force VAL into a register. */
30368 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30369 seq = get_insns ();
30372 emit_insn_before (seq, insn);
30374 ok = recog_memoized (insn) >= 0;
30383 if (TARGET_SSE || TARGET_3DNOW_A)
30387 val = gen_lowpart (SImode, val);
30388 x = gen_rtx_TRUNCATE (HImode, val);
30389 x = gen_rtx_VEC_DUPLICATE (mode, x);
30390 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30403 struct expand_vec_perm_d dperm;
30407 memset (&dperm, 0, sizeof (dperm));
30408 dperm.target = target;
30409 dperm.vmode = mode;
30410 dperm.nelt = GET_MODE_NUNITS (mode);
30411 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30413 /* Extend to SImode using a paradoxical SUBREG. */
30414 tmp1 = gen_reg_rtx (SImode);
30415 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30417 /* Insert the SImode value as low element of a V4SImode vector. */
30418 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30419 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30421 ok = (expand_vec_perm_1 (&dperm)
30422 || expand_vec_perm_broadcast_1 (&dperm));
30434 /* Replicate the value once into the next wider mode and recurse. */
30436 enum machine_mode smode, wsmode, wvmode;
30439 smode = GET_MODE_INNER (mode);
30440 wvmode = get_mode_wider_vector (mode);
30441 wsmode = GET_MODE_INNER (wvmode);
30443 val = convert_modes (wsmode, smode, val, true);
30444 x = expand_simple_binop (wsmode, ASHIFT, val,
30445 GEN_INT (GET_MODE_BITSIZE (smode)),
30446 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30447 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30449 x = gen_lowpart (wvmode, target);
30450 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30458 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30459 rtx x = gen_reg_rtx (hvmode);
30461 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30464 x = gen_rtx_VEC_CONCAT (mode, x, x);
30465 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30474 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30475 whose ONE_VAR element is VAR, and other elements are zero. Return true
30479 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30480 rtx target, rtx var, int one_var)
30482 enum machine_mode vsimode;
30485 bool use_vector_set = false;
30490 /* For SSE4.1, we normally use vector set. But if the second
30491 element is zero and inter-unit moves are OK, we use movq
30493 use_vector_set = (TARGET_64BIT
30495 && !(TARGET_INTER_UNIT_MOVES
30501 use_vector_set = TARGET_SSE4_1;
30504 use_vector_set = TARGET_SSE2;
30507 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30514 use_vector_set = TARGET_AVX;
30517 /* Use ix86_expand_vector_set in 64bit mode only. */
30518 use_vector_set = TARGET_AVX && TARGET_64BIT;
30524 if (use_vector_set)
30526 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30527 var = force_reg (GET_MODE_INNER (mode), var);
30528 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30544 var = force_reg (GET_MODE_INNER (mode), var);
30545 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30546 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30551 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30552 new_target = gen_reg_rtx (mode);
30554 new_target = target;
30555 var = force_reg (GET_MODE_INNER (mode), var);
30556 x = gen_rtx_VEC_DUPLICATE (mode, var);
30557 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30558 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30561 /* We need to shuffle the value to the correct position, so
30562 create a new pseudo to store the intermediate result. */
30564 /* With SSE2, we can use the integer shuffle insns. */
30565 if (mode != V4SFmode && TARGET_SSE2)
30567 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30569 GEN_INT (one_var == 1 ? 0 : 1),
30570 GEN_INT (one_var == 2 ? 0 : 1),
30571 GEN_INT (one_var == 3 ? 0 : 1)));
30572 if (target != new_target)
30573 emit_move_insn (target, new_target);
30577 /* Otherwise convert the intermediate result to V4SFmode and
30578 use the SSE1 shuffle instructions. */
30579 if (mode != V4SFmode)
30581 tmp = gen_reg_rtx (V4SFmode);
30582 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30587 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30589 GEN_INT (one_var == 1 ? 0 : 1),
30590 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30591 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30593 if (mode != V4SFmode)
30594 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30595 else if (tmp != target)
30596 emit_move_insn (target, tmp);
30598 else if (target != new_target)
30599 emit_move_insn (target, new_target);
30604 vsimode = V4SImode;
30610 vsimode = V2SImode;
30616 /* Zero extend the variable element to SImode and recurse. */
30617 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30619 x = gen_reg_rtx (vsimode);
30620 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30622 gcc_unreachable ();
30624 emit_move_insn (target, gen_lowpart (mode, x));
30632 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30633 consisting of the values in VALS. It is known that all elements
30634 except ONE_VAR are constants. Return true if successful. */
30637 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30638 rtx target, rtx vals, int one_var)
30640 rtx var = XVECEXP (vals, 0, one_var);
30641 enum machine_mode wmode;
30644 const_vec = copy_rtx (vals);
30645 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30646 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30654 /* For the two element vectors, it's just as easy to use
30655 the general case. */
30659 /* Use ix86_expand_vector_set in 64bit mode only. */
30682 /* There's no way to set one QImode entry easily. Combine
30683 the variable value with its adjacent constant value, and
30684 promote to an HImode set. */
30685 x = XVECEXP (vals, 0, one_var ^ 1);
30688 var = convert_modes (HImode, QImode, var, true);
30689 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30690 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30691 x = GEN_INT (INTVAL (x) & 0xff);
30695 var = convert_modes (HImode, QImode, var, true);
30696 x = gen_int_mode (INTVAL (x) << 8, HImode);
30698 if (x != const0_rtx)
30699 var = expand_simple_binop (HImode, IOR, var, x, var,
30700 1, OPTAB_LIB_WIDEN);
30702 x = gen_reg_rtx (wmode);
30703 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30704 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30706 emit_move_insn (target, gen_lowpart (mode, x));
30713 emit_move_insn (target, const_vec);
30714 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30718 /* A subroutine of ix86_expand_vector_init_general. Use vector
30719 concatenate to handle the most general case: all values variable,
30720 and none identical. */
30723 ix86_expand_vector_init_concat (enum machine_mode mode,
30724 rtx target, rtx *ops, int n)
30726 enum machine_mode cmode, hmode = VOIDmode;
30727 rtx first[8], second[4];
30767 gcc_unreachable ();
30770 if (!register_operand (ops[1], cmode))
30771 ops[1] = force_reg (cmode, ops[1]);
30772 if (!register_operand (ops[0], cmode))
30773 ops[0] = force_reg (cmode, ops[0]);
30774 emit_insn (gen_rtx_SET (VOIDmode, target,
30775 gen_rtx_VEC_CONCAT (mode, ops[0],
30795 gcc_unreachable ();
30811 gcc_unreachable ();
30816 /* FIXME: We process inputs backward to help RA. PR 36222. */
30819 for (; i > 0; i -= 2, j--)
30821 first[j] = gen_reg_rtx (cmode);
30822 v = gen_rtvec (2, ops[i - 1], ops[i]);
30823 ix86_expand_vector_init (false, first[j],
30824 gen_rtx_PARALLEL (cmode, v));
30830 gcc_assert (hmode != VOIDmode);
30831 for (i = j = 0; i < n; i += 2, j++)
30833 second[j] = gen_reg_rtx (hmode);
30834 ix86_expand_vector_init_concat (hmode, second [j],
30838 ix86_expand_vector_init_concat (mode, target, second, n);
30841 ix86_expand_vector_init_concat (mode, target, first, n);
30845 gcc_unreachable ();
30849 /* A subroutine of ix86_expand_vector_init_general. Use vector
30850 interleave to handle the most general case: all values variable,
30851 and none identical. */
30854 ix86_expand_vector_init_interleave (enum machine_mode mode,
30855 rtx target, rtx *ops, int n)
30857 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30860 rtx (*gen_load_even) (rtx, rtx, rtx);
30861 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30862 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30867 gen_load_even = gen_vec_setv8hi;
30868 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30869 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30870 inner_mode = HImode;
30871 first_imode = V4SImode;
30872 second_imode = V2DImode;
30873 third_imode = VOIDmode;
30876 gen_load_even = gen_vec_setv16qi;
30877 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30878 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30879 inner_mode = QImode;
30880 first_imode = V8HImode;
30881 second_imode = V4SImode;
30882 third_imode = V2DImode;
30885 gcc_unreachable ();
30888 for (i = 0; i < n; i++)
30890 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30891 op0 = gen_reg_rtx (SImode);
30892 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30894 /* Insert the SImode value as low element of V4SImode vector. */
30895 op1 = gen_reg_rtx (V4SImode);
30896 op0 = gen_rtx_VEC_MERGE (V4SImode,
30897 gen_rtx_VEC_DUPLICATE (V4SImode,
30899 CONST0_RTX (V4SImode),
30901 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30903 /* Cast the V4SImode vector back to a vector in orignal mode. */
30904 op0 = gen_reg_rtx (mode);
30905 emit_move_insn (op0, gen_lowpart (mode, op1));
30907 /* Load even elements into the second positon. */
30908 emit_insn (gen_load_even (op0,
30909 force_reg (inner_mode,
30913 /* Cast vector to FIRST_IMODE vector. */
30914 ops[i] = gen_reg_rtx (first_imode);
30915 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30918 /* Interleave low FIRST_IMODE vectors. */
30919 for (i = j = 0; i < n; i += 2, j++)
30921 op0 = gen_reg_rtx (first_imode);
30922 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30924 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30925 ops[j] = gen_reg_rtx (second_imode);
30926 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30929 /* Interleave low SECOND_IMODE vectors. */
30930 switch (second_imode)
30933 for (i = j = 0; i < n / 2; i += 2, j++)
30935 op0 = gen_reg_rtx (second_imode);
30936 emit_insn (gen_interleave_second_low (op0, ops[i],
30939 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30941 ops[j] = gen_reg_rtx (third_imode);
30942 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30944 second_imode = V2DImode;
30945 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30949 op0 = gen_reg_rtx (second_imode);
30950 emit_insn (gen_interleave_second_low (op0, ops[0],
30953 /* Cast the SECOND_IMODE vector back to a vector on original
30955 emit_insn (gen_rtx_SET (VOIDmode, target,
30956 gen_lowpart (mode, op0)));
30960 gcc_unreachable ();
30964 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30965 all values variable, and none identical. */
30968 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30969 rtx target, rtx vals)
30971 rtx ops[32], op0, op1;
30972 enum machine_mode half_mode = VOIDmode;
30979 if (!mmx_ok && !TARGET_SSE)
30991 n = GET_MODE_NUNITS (mode);
30992 for (i = 0; i < n; i++)
30993 ops[i] = XVECEXP (vals, 0, i);
30994 ix86_expand_vector_init_concat (mode, target, ops, n);
30998 half_mode = V16QImode;
31002 half_mode = V8HImode;
31006 n = GET_MODE_NUNITS (mode);
31007 for (i = 0; i < n; i++)
31008 ops[i] = XVECEXP (vals, 0, i);
31009 op0 = gen_reg_rtx (half_mode);
31010 op1 = gen_reg_rtx (half_mode);
31011 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31013 ix86_expand_vector_init_interleave (half_mode, op1,
31014 &ops [n >> 1], n >> 2);
31015 emit_insn (gen_rtx_SET (VOIDmode, target,
31016 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31020 if (!TARGET_SSE4_1)
31028 /* Don't use ix86_expand_vector_init_interleave if we can't
31029 move from GPR to SSE register directly. */
31030 if (!TARGET_INTER_UNIT_MOVES)
31033 n = GET_MODE_NUNITS (mode);
31034 for (i = 0; i < n; i++)
31035 ops[i] = XVECEXP (vals, 0, i);
31036 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31044 gcc_unreachable ();
31048 int i, j, n_elts, n_words, n_elt_per_word;
31049 enum machine_mode inner_mode;
31050 rtx words[4], shift;
31052 inner_mode = GET_MODE_INNER (mode);
31053 n_elts = GET_MODE_NUNITS (mode);
31054 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31055 n_elt_per_word = n_elts / n_words;
31056 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31058 for (i = 0; i < n_words; ++i)
31060 rtx word = NULL_RTX;
31062 for (j = 0; j < n_elt_per_word; ++j)
31064 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31065 elt = convert_modes (word_mode, inner_mode, elt, true);
31071 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31072 word, 1, OPTAB_LIB_WIDEN);
31073 word = expand_simple_binop (word_mode, IOR, word, elt,
31074 word, 1, OPTAB_LIB_WIDEN);
31082 emit_move_insn (target, gen_lowpart (mode, words[0]));
31083 else if (n_words == 2)
31085 rtx tmp = gen_reg_rtx (mode);
31086 emit_clobber (tmp);
31087 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31088 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31089 emit_move_insn (target, tmp);
31091 else if (n_words == 4)
31093 rtx tmp = gen_reg_rtx (V4SImode);
31094 gcc_assert (word_mode == SImode);
31095 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31096 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31097 emit_move_insn (target, gen_lowpart (mode, tmp));
31100 gcc_unreachable ();
31104 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31105 instructions unless MMX_OK is true. */
31108 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31110 enum machine_mode mode = GET_MODE (target);
31111 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31112 int n_elts = GET_MODE_NUNITS (mode);
31113 int n_var = 0, one_var = -1;
31114 bool all_same = true, all_const_zero = true;
31118 for (i = 0; i < n_elts; ++i)
31120 x = XVECEXP (vals, 0, i);
31121 if (!(CONST_INT_P (x)
31122 || GET_CODE (x) == CONST_DOUBLE
31123 || GET_CODE (x) == CONST_FIXED))
31124 n_var++, one_var = i;
31125 else if (x != CONST0_RTX (inner_mode))
31126 all_const_zero = false;
31127 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31131 /* Constants are best loaded from the constant pool. */
31134 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31138 /* If all values are identical, broadcast the value. */
31140 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31141 XVECEXP (vals, 0, 0)))
31144 /* Values where only one field is non-constant are best loaded from
31145 the pool and overwritten via move later. */
31149 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31150 XVECEXP (vals, 0, one_var),
31154 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31158 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31162 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31164 enum machine_mode mode = GET_MODE (target);
31165 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31166 enum machine_mode half_mode;
31167 bool use_vec_merge = false;
31169 static rtx (*gen_extract[6][2]) (rtx, rtx)
31171 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31172 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31173 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31174 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31175 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31176 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31178 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31180 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31181 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31182 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31183 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31184 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31185 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31195 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31196 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31198 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31200 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31201 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31207 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31211 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31212 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31214 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31216 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31217 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31224 /* For the two element vectors, we implement a VEC_CONCAT with
31225 the extraction of the other element. */
31227 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31228 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31231 op0 = val, op1 = tmp;
31233 op0 = tmp, op1 = val;
31235 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31236 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31241 use_vec_merge = TARGET_SSE4_1;
31248 use_vec_merge = true;
31252 /* tmp = target = A B C D */
31253 tmp = copy_to_reg (target);
31254 /* target = A A B B */
31255 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31256 /* target = X A B B */
31257 ix86_expand_vector_set (false, target, val, 0);
31258 /* target = A X C D */
31259 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31260 const1_rtx, const0_rtx,
31261 GEN_INT (2+4), GEN_INT (3+4)));
31265 /* tmp = target = A B C D */
31266 tmp = copy_to_reg (target);
31267 /* tmp = X B C D */
31268 ix86_expand_vector_set (false, tmp, val, 0);
31269 /* target = A B X D */
31270 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31271 const0_rtx, const1_rtx,
31272 GEN_INT (0+4), GEN_INT (3+4)));
31276 /* tmp = target = A B C D */
31277 tmp = copy_to_reg (target);
31278 /* tmp = X B C D */
31279 ix86_expand_vector_set (false, tmp, val, 0);
31280 /* target = A B X D */
31281 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31282 const0_rtx, const1_rtx,
31283 GEN_INT (2+4), GEN_INT (0+4)));
31287 gcc_unreachable ();
31292 use_vec_merge = TARGET_SSE4_1;
31296 /* Element 0 handled by vec_merge below. */
31299 use_vec_merge = true;
31305 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31306 store into element 0, then shuffle them back. */
31310 order[0] = GEN_INT (elt);
31311 order[1] = const1_rtx;
31312 order[2] = const2_rtx;
31313 order[3] = GEN_INT (3);
31314 order[elt] = const0_rtx;
31316 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31317 order[1], order[2], order[3]));
31319 ix86_expand_vector_set (false, target, val, 0);
31321 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31322 order[1], order[2], order[3]));
31326 /* For SSE1, we have to reuse the V4SF code. */
31327 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31328 gen_lowpart (SFmode, val), elt);
31333 use_vec_merge = TARGET_SSE2;
31336 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31340 use_vec_merge = TARGET_SSE4_1;
31347 half_mode = V16QImode;
31353 half_mode = V8HImode;
31359 half_mode = V4SImode;
31365 half_mode = V2DImode;
31371 half_mode = V4SFmode;
31377 half_mode = V2DFmode;
31383 /* Compute offset. */
31387 gcc_assert (i <= 1);
31389 /* Extract the half. */
31390 tmp = gen_reg_rtx (half_mode);
31391 emit_insn (gen_extract[j][i] (tmp, target));
31393 /* Put val in tmp at elt. */
31394 ix86_expand_vector_set (false, tmp, val, elt);
31397 emit_insn (gen_insert[j][i] (target, target, tmp));
31406 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31407 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31408 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31412 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31414 emit_move_insn (mem, target);
31416 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31417 emit_move_insn (tmp, val);
31419 emit_move_insn (target, mem);
31424 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31426 enum machine_mode mode = GET_MODE (vec);
31427 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31428 bool use_vec_extr = false;
31441 use_vec_extr = true;
31445 use_vec_extr = TARGET_SSE4_1;
31457 tmp = gen_reg_rtx (mode);
31458 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31459 GEN_INT (elt), GEN_INT (elt),
31460 GEN_INT (elt+4), GEN_INT (elt+4)));
31464 tmp = gen_reg_rtx (mode);
31465 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31469 gcc_unreachable ();
31472 use_vec_extr = true;
31477 use_vec_extr = TARGET_SSE4_1;
31491 tmp = gen_reg_rtx (mode);
31492 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31493 GEN_INT (elt), GEN_INT (elt),
31494 GEN_INT (elt), GEN_INT (elt)));
31498 tmp = gen_reg_rtx (mode);
31499 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31503 gcc_unreachable ();
31506 use_vec_extr = true;
31511 /* For SSE1, we have to reuse the V4SF code. */
31512 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31513 gen_lowpart (V4SFmode, vec), elt);
31519 use_vec_extr = TARGET_SSE2;
31522 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31526 use_vec_extr = TARGET_SSE4_1;
31530 /* ??? Could extract the appropriate HImode element and shift. */
31537 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31538 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31540 /* Let the rtl optimizers know about the zero extension performed. */
31541 if (inner_mode == QImode || inner_mode == HImode)
31543 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31544 target = gen_lowpart (SImode, target);
31547 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31551 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31553 emit_move_insn (mem, vec);
31555 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31556 emit_move_insn (target, tmp);
31560 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31561 pattern to reduce; DEST is the destination; IN is the input vector. */
31564 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31566 rtx tmp1, tmp2, tmp3;
31568 tmp1 = gen_reg_rtx (V4SFmode);
31569 tmp2 = gen_reg_rtx (V4SFmode);
31570 tmp3 = gen_reg_rtx (V4SFmode);
31572 emit_insn (gen_sse_movhlps (tmp1, in, in));
31573 emit_insn (fn (tmp2, tmp1, in));
31575 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31576 const1_rtx, const1_rtx,
31577 GEN_INT (1+4), GEN_INT (1+4)));
31578 emit_insn (fn (dest, tmp2, tmp3));
31581 /* Target hook for scalar_mode_supported_p. */
31583 ix86_scalar_mode_supported_p (enum machine_mode mode)
31585 if (DECIMAL_FLOAT_MODE_P (mode))
31586 return default_decimal_float_supported_p ();
31587 else if (mode == TFmode)
31590 return default_scalar_mode_supported_p (mode);
31593 /* Implements target hook vector_mode_supported_p. */
31595 ix86_vector_mode_supported_p (enum machine_mode mode)
31597 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31599 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31601 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31603 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31605 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31610 /* Target hook for c_mode_for_suffix. */
31611 static enum machine_mode
31612 ix86_c_mode_for_suffix (char suffix)
31622 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31624 We do this in the new i386 backend to maintain source compatibility
31625 with the old cc0-based compiler. */
31628 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31629 tree inputs ATTRIBUTE_UNUSED,
31632 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31634 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31639 /* Implements target vector targetm.asm.encode_section_info. */
31641 static void ATTRIBUTE_UNUSED
31642 ix86_encode_section_info (tree decl, rtx rtl, int first)
31644 default_encode_section_info (decl, rtl, first);
31646 if (TREE_CODE (decl) == VAR_DECL
31647 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31648 && ix86_in_large_data_p (decl))
31649 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31652 /* Worker function for REVERSE_CONDITION. */
31655 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31657 return (mode != CCFPmode && mode != CCFPUmode
31658 ? reverse_condition (code)
31659 : reverse_condition_maybe_unordered (code));
31662 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31666 output_387_reg_move (rtx insn, rtx *operands)
31668 if (REG_P (operands[0]))
31670 if (REG_P (operands[1])
31671 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31673 if (REGNO (operands[0]) == FIRST_STACK_REG)
31674 return output_387_ffreep (operands, 0);
31675 return "fstp\t%y0";
31677 if (STACK_TOP_P (operands[0]))
31678 return "fld%Z1\t%y1";
31681 else if (MEM_P (operands[0]))
31683 gcc_assert (REG_P (operands[1]));
31684 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31685 return "fstp%Z0\t%y0";
31688 /* There is no non-popping store to memory for XFmode.
31689 So if we need one, follow the store with a load. */
31690 if (GET_MODE (operands[0]) == XFmode)
31691 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31693 return "fst%Z0\t%y0";
31700 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31701 FP status register is set. */
31704 ix86_emit_fp_unordered_jump (rtx label)
31706 rtx reg = gen_reg_rtx (HImode);
31709 emit_insn (gen_x86_fnstsw_1 (reg));
31711 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31713 emit_insn (gen_x86_sahf_1 (reg));
31715 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31716 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31720 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31722 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31723 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31726 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31727 gen_rtx_LABEL_REF (VOIDmode, label),
31729 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31731 emit_jump_insn (temp);
31732 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31735 /* Output code to perform a log1p XFmode calculation. */
31737 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31739 rtx label1 = gen_label_rtx ();
31740 rtx label2 = gen_label_rtx ();
31742 rtx tmp = gen_reg_rtx (XFmode);
31743 rtx tmp2 = gen_reg_rtx (XFmode);
31746 emit_insn (gen_absxf2 (tmp, op1));
31747 test = gen_rtx_GE (VOIDmode, tmp,
31748 CONST_DOUBLE_FROM_REAL_VALUE (
31749 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31751 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31753 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31754 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31755 emit_jump (label2);
31757 emit_label (label1);
31758 emit_move_insn (tmp, CONST1_RTX (XFmode));
31759 emit_insn (gen_addxf3 (tmp, op1, tmp));
31760 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31761 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31763 emit_label (label2);
31766 /* Emit code for round calculation. */
31767 void ix86_emit_i387_round (rtx op0, rtx op1)
31769 enum machine_mode inmode = GET_MODE (op1);
31770 enum machine_mode outmode = GET_MODE (op0);
31771 rtx e1, e2, res, tmp, tmp1, half;
31772 rtx scratch = gen_reg_rtx (HImode);
31773 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
31774 rtx jump_label = gen_label_rtx ();
31776 rtx (*gen_abs) (rtx, rtx);
31777 rtx (*gen_neg) (rtx, rtx);
31782 gen_abs = gen_abssf2;
31785 gen_abs = gen_absdf2;
31788 gen_abs = gen_absxf2;
31791 gcc_unreachable ();
31797 gen_neg = gen_negsf2;
31800 gen_neg = gen_negdf2;
31803 gen_neg = gen_negxf2;
31806 gen_neg = gen_neghi2;
31809 gen_neg = gen_negsi2;
31812 gen_neg = gen_negdi2;
31815 gcc_unreachable ();
31818 e1 = gen_reg_rtx (inmode);
31819 e2 = gen_reg_rtx (inmode);
31820 res = gen_reg_rtx (outmode);
31822 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
31824 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
31826 /* scratch = fxam(op1) */
31827 emit_insn (gen_rtx_SET (VOIDmode, scratch,
31828 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
31830 /* e1 = fabs(op1) */
31831 emit_insn (gen_abs (e1, op1));
31833 /* e2 = e1 + 0.5 */
31834 half = force_reg (inmode, half);
31835 emit_insn (gen_rtx_SET (VOIDmode, e2,
31836 gen_rtx_PLUS (inmode, e1, half)));
31838 /* res = floor(e2) */
31839 if (inmode != XFmode)
31841 tmp1 = gen_reg_rtx (XFmode);
31843 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
31844 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
31854 rtx tmp0 = gen_reg_rtx (XFmode);
31856 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
31858 emit_insn (gen_rtx_SET (VOIDmode, res,
31859 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
31860 UNSPEC_TRUNC_NOOP)));
31864 emit_insn (gen_frndintxf2_floor (res, tmp1));
31867 emit_insn (gen_lfloorxfhi2 (res, tmp1));
31870 emit_insn (gen_lfloorxfsi2 (res, tmp1));
31873 emit_insn (gen_lfloorxfdi2 (res, tmp1));
31876 gcc_unreachable ();
31879 /* flags = signbit(a) */
31880 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
31882 /* if (flags) then res = -res */
31883 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
31884 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
31885 gen_rtx_LABEL_REF (VOIDmode, jump_label),
31887 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31888 predict_jump (REG_BR_PROB_BASE * 50 / 100);
31889 JUMP_LABEL (insn) = jump_label;
31891 emit_insn (gen_neg (res, res));
31893 emit_label (jump_label);
31894 LABEL_NUSES (jump_label) = 1;
31896 emit_move_insn (op0, res);
31899 /* Output code to perform a Newton-Rhapson approximation of a single precision
31900 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31902 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31904 rtx x0, x1, e0, e1;
31906 x0 = gen_reg_rtx (mode);
31907 e0 = gen_reg_rtx (mode);
31908 e1 = gen_reg_rtx (mode);
31909 x1 = gen_reg_rtx (mode);
31911 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31913 /* x0 = rcp(b) estimate */
31914 emit_insn (gen_rtx_SET (VOIDmode, x0,
31915 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31918 emit_insn (gen_rtx_SET (VOIDmode, e0,
31919 gen_rtx_MULT (mode, x0, b)));
31922 emit_insn (gen_rtx_SET (VOIDmode, e0,
31923 gen_rtx_MULT (mode, x0, e0)));
31926 emit_insn (gen_rtx_SET (VOIDmode, e1,
31927 gen_rtx_PLUS (mode, x0, x0)));
31930 emit_insn (gen_rtx_SET (VOIDmode, x1,
31931 gen_rtx_MINUS (mode, e1, e0)));
31934 emit_insn (gen_rtx_SET (VOIDmode, res,
31935 gen_rtx_MULT (mode, a, x1)));
31938 /* Output code to perform a Newton-Rhapson approximation of a
31939 single precision floating point [reciprocal] square root. */
31941 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31944 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31947 x0 = gen_reg_rtx (mode);
31948 e0 = gen_reg_rtx (mode);
31949 e1 = gen_reg_rtx (mode);
31950 e2 = gen_reg_rtx (mode);
31951 e3 = gen_reg_rtx (mode);
31953 real_from_integer (&r, VOIDmode, -3, -1, 0);
31954 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31956 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31957 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31959 if (VECTOR_MODE_P (mode))
31961 mthree = ix86_build_const_vector (mode, true, mthree);
31962 mhalf = ix86_build_const_vector (mode, true, mhalf);
31965 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31966 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31968 /* x0 = rsqrt(a) estimate */
31969 emit_insn (gen_rtx_SET (VOIDmode, x0,
31970 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31973 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31978 zero = gen_reg_rtx (mode);
31979 mask = gen_reg_rtx (mode);
31981 zero = force_reg (mode, CONST0_RTX(mode));
31982 emit_insn (gen_rtx_SET (VOIDmode, mask,
31983 gen_rtx_NE (mode, zero, a)));
31985 emit_insn (gen_rtx_SET (VOIDmode, x0,
31986 gen_rtx_AND (mode, x0, mask)));
31990 emit_insn (gen_rtx_SET (VOIDmode, e0,
31991 gen_rtx_MULT (mode, x0, a)));
31993 emit_insn (gen_rtx_SET (VOIDmode, e1,
31994 gen_rtx_MULT (mode, e0, x0)));
31997 mthree = force_reg (mode, mthree);
31998 emit_insn (gen_rtx_SET (VOIDmode, e2,
31999 gen_rtx_PLUS (mode, e1, mthree)));
32001 mhalf = force_reg (mode, mhalf);
32003 /* e3 = -.5 * x0 */
32004 emit_insn (gen_rtx_SET (VOIDmode, e3,
32005 gen_rtx_MULT (mode, x0, mhalf)));
32007 /* e3 = -.5 * e0 */
32008 emit_insn (gen_rtx_SET (VOIDmode, e3,
32009 gen_rtx_MULT (mode, e0, mhalf)));
32010 /* ret = e2 * e3 */
32011 emit_insn (gen_rtx_SET (VOIDmode, res,
32012 gen_rtx_MULT (mode, e2, e3)));
32015 #ifdef TARGET_SOLARIS
32016 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32019 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32022 /* With Binutils 2.15, the "@unwind" marker must be specified on
32023 every occurrence of the ".eh_frame" section, not just the first
32026 && strcmp (name, ".eh_frame") == 0)
32028 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32029 flags & SECTION_WRITE ? "aw" : "a");
32034 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
32036 solaris_elf_asm_comdat_section (name, flags, decl);
32041 default_elf_asm_named_section (name, flags, decl);
32043 #endif /* TARGET_SOLARIS */
32045 /* Return the mangling of TYPE if it is an extended fundamental type. */
32047 static const char *
32048 ix86_mangle_type (const_tree type)
32050 type = TYPE_MAIN_VARIANT (type);
32052 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32053 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32056 switch (TYPE_MODE (type))
32059 /* __float128 is "g". */
32062 /* "long double" or __float80 is "e". */
32069 /* For 32-bit code we can save PIC register setup by using
32070 __stack_chk_fail_local hidden function instead of calling
32071 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32072 register, so it is better to call __stack_chk_fail directly. */
32074 static tree ATTRIBUTE_UNUSED
32075 ix86_stack_protect_fail (void)
32077 return TARGET_64BIT
32078 ? default_external_stack_protect_fail ()
32079 : default_hidden_stack_protect_fail ();
32082 /* Select a format to encode pointers in exception handling data. CODE
32083 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32084 true if the symbol may be affected by dynamic relocations.
32086 ??? All x86 object file formats are capable of representing this.
32087 After all, the relocation needed is the same as for the call insn.
32088 Whether or not a particular assembler allows us to enter such, I
32089 guess we'll have to see. */
32091 asm_preferred_eh_data_format (int code, int global)
32095 int type = DW_EH_PE_sdata8;
32097 || ix86_cmodel == CM_SMALL_PIC
32098 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32099 type = DW_EH_PE_sdata4;
32100 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32102 if (ix86_cmodel == CM_SMALL
32103 || (ix86_cmodel == CM_MEDIUM && code))
32104 return DW_EH_PE_udata4;
32105 return DW_EH_PE_absptr;
32108 /* Expand copysign from SIGN to the positive value ABS_VALUE
32109 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32112 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32114 enum machine_mode mode = GET_MODE (sign);
32115 rtx sgn = gen_reg_rtx (mode);
32116 if (mask == NULL_RTX)
32118 enum machine_mode vmode;
32120 if (mode == SFmode)
32122 else if (mode == DFmode)
32127 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32128 if (!VECTOR_MODE_P (mode))
32130 /* We need to generate a scalar mode mask in this case. */
32131 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32132 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32133 mask = gen_reg_rtx (mode);
32134 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32138 mask = gen_rtx_NOT (mode, mask);
32139 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32140 gen_rtx_AND (mode, mask, sign)));
32141 emit_insn (gen_rtx_SET (VOIDmode, result,
32142 gen_rtx_IOR (mode, abs_value, sgn)));
32145 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32146 mask for masking out the sign-bit is stored in *SMASK, if that is
32149 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32151 enum machine_mode vmode, mode = GET_MODE (op0);
32154 xa = gen_reg_rtx (mode);
32155 if (mode == SFmode)
32157 else if (mode == DFmode)
32161 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32162 if (!VECTOR_MODE_P (mode))
32164 /* We need to generate a scalar mode mask in this case. */
32165 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32166 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32167 mask = gen_reg_rtx (mode);
32168 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32170 emit_insn (gen_rtx_SET (VOIDmode, xa,
32171 gen_rtx_AND (mode, op0, mask)));
32179 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32180 swapping the operands if SWAP_OPERANDS is true. The expanded
32181 code is a forward jump to a newly created label in case the
32182 comparison is true. The generated label rtx is returned. */
32184 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32185 bool swap_operands)
32196 label = gen_label_rtx ();
32197 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32198 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32199 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32200 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32201 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32202 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32203 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32204 JUMP_LABEL (tmp) = label;
32209 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32210 using comparison code CODE. Operands are swapped for the comparison if
32211 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32213 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32214 bool swap_operands)
32216 rtx (*insn)(rtx, rtx, rtx, rtx);
32217 enum machine_mode mode = GET_MODE (op0);
32218 rtx mask = gen_reg_rtx (mode);
32227 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
32229 emit_insn (insn (mask, op0, op1,
32230 gen_rtx_fmt_ee (code, mode, op0, op1)));
32234 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32235 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32237 ix86_gen_TWO52 (enum machine_mode mode)
32239 REAL_VALUE_TYPE TWO52r;
32242 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32243 TWO52 = const_double_from_real_value (TWO52r, mode);
32244 TWO52 = force_reg (mode, TWO52);
32249 /* Expand SSE sequence for computing lround from OP1 storing
32252 ix86_expand_lround (rtx op0, rtx op1)
32254 /* C code for the stuff we're doing below:
32255 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32258 enum machine_mode mode = GET_MODE (op1);
32259 const struct real_format *fmt;
32260 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32263 /* load nextafter (0.5, 0.0) */
32264 fmt = REAL_MODE_FORMAT (mode);
32265 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32266 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32268 /* adj = copysign (0.5, op1) */
32269 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32270 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32272 /* adj = op1 + adj */
32273 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32275 /* op0 = (imode)adj */
32276 expand_fix (op0, adj, 0);
32279 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32282 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32284 /* C code for the stuff we're doing below (for do_floor):
32286 xi -= (double)xi > op1 ? 1 : 0;
32289 enum machine_mode fmode = GET_MODE (op1);
32290 enum machine_mode imode = GET_MODE (op0);
32291 rtx ireg, freg, label, tmp;
32293 /* reg = (long)op1 */
32294 ireg = gen_reg_rtx (imode);
32295 expand_fix (ireg, op1, 0);
32297 /* freg = (double)reg */
32298 freg = gen_reg_rtx (fmode);
32299 expand_float (freg, ireg, 0);
32301 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32302 label = ix86_expand_sse_compare_and_jump (UNLE,
32303 freg, op1, !do_floor);
32304 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32305 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32306 emit_move_insn (ireg, tmp);
32308 emit_label (label);
32309 LABEL_NUSES (label) = 1;
32311 emit_move_insn (op0, ireg);
32314 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32315 result in OPERAND0. */
32317 ix86_expand_rint (rtx operand0, rtx operand1)
32319 /* C code for the stuff we're doing below:
32320 xa = fabs (operand1);
32321 if (!isless (xa, 2**52))
32323 xa = xa + 2**52 - 2**52;
32324 return copysign (xa, operand1);
32326 enum machine_mode mode = GET_MODE (operand0);
32327 rtx res, xa, label, TWO52, mask;
32329 res = gen_reg_rtx (mode);
32330 emit_move_insn (res, operand1);
32332 /* xa = abs (operand1) */
32333 xa = ix86_expand_sse_fabs (res, &mask);
32335 /* if (!isless (xa, TWO52)) goto label; */
32336 TWO52 = ix86_gen_TWO52 (mode);
32337 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32339 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32340 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32342 ix86_sse_copysign_to_positive (res, xa, res, mask);
32344 emit_label (label);
32345 LABEL_NUSES (label) = 1;
32347 emit_move_insn (operand0, res);
32350 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32353 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32355 /* C code for the stuff we expand below.
32356 double xa = fabs (x), x2;
32357 if (!isless (xa, TWO52))
32359 xa = xa + TWO52 - TWO52;
32360 x2 = copysign (xa, x);
32369 enum machine_mode mode = GET_MODE (operand0);
32370 rtx xa, TWO52, tmp, label, one, res, mask;
32372 TWO52 = ix86_gen_TWO52 (mode);
32374 /* Temporary for holding the result, initialized to the input
32375 operand to ease control flow. */
32376 res = gen_reg_rtx (mode);
32377 emit_move_insn (res, operand1);
32379 /* xa = abs (operand1) */
32380 xa = ix86_expand_sse_fabs (res, &mask);
32382 /* if (!isless (xa, TWO52)) goto label; */
32383 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32385 /* xa = xa + TWO52 - TWO52; */
32386 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32387 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32389 /* xa = copysign (xa, operand1) */
32390 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32392 /* generate 1.0 or -1.0 */
32393 one = force_reg (mode,
32394 const_double_from_real_value (do_floor
32395 ? dconst1 : dconstm1, mode));
32397 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32398 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32399 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32400 gen_rtx_AND (mode, one, tmp)));
32401 /* We always need to subtract here to preserve signed zero. */
32402 tmp = expand_simple_binop (mode, MINUS,
32403 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32404 emit_move_insn (res, tmp);
32406 emit_label (label);
32407 LABEL_NUSES (label) = 1;
32409 emit_move_insn (operand0, res);
32412 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32415 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32417 /* C code for the stuff we expand below.
32418 double xa = fabs (x), x2;
32419 if (!isless (xa, TWO52))
32421 x2 = (double)(long)x;
32428 if (HONOR_SIGNED_ZEROS (mode))
32429 return copysign (x2, x);
32432 enum machine_mode mode = GET_MODE (operand0);
32433 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32435 TWO52 = ix86_gen_TWO52 (mode);
32437 /* Temporary for holding the result, initialized to the input
32438 operand to ease control flow. */
32439 res = gen_reg_rtx (mode);
32440 emit_move_insn (res, operand1);
32442 /* xa = abs (operand1) */
32443 xa = ix86_expand_sse_fabs (res, &mask);
32445 /* if (!isless (xa, TWO52)) goto label; */
32446 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32448 /* xa = (double)(long)x */
32449 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32450 expand_fix (xi, res, 0);
32451 expand_float (xa, xi, 0);
32454 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32456 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32457 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32458 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32459 gen_rtx_AND (mode, one, tmp)));
32460 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32461 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32462 emit_move_insn (res, tmp);
32464 if (HONOR_SIGNED_ZEROS (mode))
32465 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32467 emit_label (label);
32468 LABEL_NUSES (label) = 1;
32470 emit_move_insn (operand0, res);
32473 /* Expand SSE sequence for computing round from OPERAND1 storing
32474 into OPERAND0. Sequence that works without relying on DImode truncation
32475 via cvttsd2siq that is only available on 64bit targets. */
32477 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32479 /* C code for the stuff we expand below.
32480 double xa = fabs (x), xa2, x2;
32481 if (!isless (xa, TWO52))
32483 Using the absolute value and copying back sign makes
32484 -0.0 -> -0.0 correct.
32485 xa2 = xa + TWO52 - TWO52;
32490 else if (dxa > 0.5)
32492 x2 = copysign (xa2, x);
32495 enum machine_mode mode = GET_MODE (operand0);
32496 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32498 TWO52 = ix86_gen_TWO52 (mode);
32500 /* Temporary for holding the result, initialized to the input
32501 operand to ease control flow. */
32502 res = gen_reg_rtx (mode);
32503 emit_move_insn (res, operand1);
32505 /* xa = abs (operand1) */
32506 xa = ix86_expand_sse_fabs (res, &mask);
32508 /* if (!isless (xa, TWO52)) goto label; */
32509 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32511 /* xa2 = xa + TWO52 - TWO52; */
32512 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32513 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32515 /* dxa = xa2 - xa; */
32516 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32518 /* generate 0.5, 1.0 and -0.5 */
32519 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32520 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32521 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32525 tmp = gen_reg_rtx (mode);
32526 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32527 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32528 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32529 gen_rtx_AND (mode, one, tmp)));
32530 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32531 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32532 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32533 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32534 gen_rtx_AND (mode, one, tmp)));
32535 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32537 /* res = copysign (xa2, operand1) */
32538 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32540 emit_label (label);
32541 LABEL_NUSES (label) = 1;
32543 emit_move_insn (operand0, res);
32546 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32549 ix86_expand_trunc (rtx operand0, rtx operand1)
32551 /* C code for SSE variant we expand below.
32552 double xa = fabs (x), x2;
32553 if (!isless (xa, TWO52))
32555 x2 = (double)(long)x;
32556 if (HONOR_SIGNED_ZEROS (mode))
32557 return copysign (x2, x);
32560 enum machine_mode mode = GET_MODE (operand0);
32561 rtx xa, xi, TWO52, label, res, mask;
32563 TWO52 = ix86_gen_TWO52 (mode);
32565 /* Temporary for holding the result, initialized to the input
32566 operand to ease control flow. */
32567 res = gen_reg_rtx (mode);
32568 emit_move_insn (res, operand1);
32570 /* xa = abs (operand1) */
32571 xa = ix86_expand_sse_fabs (res, &mask);
32573 /* if (!isless (xa, TWO52)) goto label; */
32574 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32576 /* x = (double)(long)x */
32577 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32578 expand_fix (xi, res, 0);
32579 expand_float (res, xi, 0);
32581 if (HONOR_SIGNED_ZEROS (mode))
32582 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32584 emit_label (label);
32585 LABEL_NUSES (label) = 1;
32587 emit_move_insn (operand0, res);
32590 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32593 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32595 enum machine_mode mode = GET_MODE (operand0);
32596 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32598 /* C code for SSE variant we expand below.
32599 double xa = fabs (x), x2;
32600 if (!isless (xa, TWO52))
32602 xa2 = xa + TWO52 - TWO52;
32606 x2 = copysign (xa2, x);
32610 TWO52 = ix86_gen_TWO52 (mode);
32612 /* Temporary for holding the result, initialized to the input
32613 operand to ease control flow. */
32614 res = gen_reg_rtx (mode);
32615 emit_move_insn (res, operand1);
32617 /* xa = abs (operand1) */
32618 xa = ix86_expand_sse_fabs (res, &smask);
32620 /* if (!isless (xa, TWO52)) goto label; */
32621 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32623 /* res = xa + TWO52 - TWO52; */
32624 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32625 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32626 emit_move_insn (res, tmp);
32629 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32631 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32632 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32633 emit_insn (gen_rtx_SET (VOIDmode, mask,
32634 gen_rtx_AND (mode, mask, one)));
32635 tmp = expand_simple_binop (mode, MINUS,
32636 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32637 emit_move_insn (res, tmp);
32639 /* res = copysign (res, operand1) */
32640 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32642 emit_label (label);
32643 LABEL_NUSES (label) = 1;
32645 emit_move_insn (operand0, res);
32648 /* Expand SSE sequence for computing round from OPERAND1 storing
32651 ix86_expand_round (rtx operand0, rtx operand1)
32653 /* C code for the stuff we're doing below:
32654 double xa = fabs (x);
32655 if (!isless (xa, TWO52))
32657 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32658 return copysign (xa, x);
32660 enum machine_mode mode = GET_MODE (operand0);
32661 rtx res, TWO52, xa, label, xi, half, mask;
32662 const struct real_format *fmt;
32663 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32665 /* Temporary for holding the result, initialized to the input
32666 operand to ease control flow. */
32667 res = gen_reg_rtx (mode);
32668 emit_move_insn (res, operand1);
32670 TWO52 = ix86_gen_TWO52 (mode);
32671 xa = ix86_expand_sse_fabs (res, &mask);
32672 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32674 /* load nextafter (0.5, 0.0) */
32675 fmt = REAL_MODE_FORMAT (mode);
32676 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32677 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32679 /* xa = xa + 0.5 */
32680 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32681 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32683 /* xa = (double)(int64_t)xa */
32684 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32685 expand_fix (xi, xa, 0);
32686 expand_float (xa, xi, 0);
32688 /* res = copysign (xa, operand1) */
32689 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32691 emit_label (label);
32692 LABEL_NUSES (label) = 1;
32694 emit_move_insn (operand0, res);
32697 /* Expand SSE sequence for computing round
32698 from OP1 storing into OP0 using sse4 round insn. */
32700 ix86_expand_round_sse4 (rtx op0, rtx op1)
32702 enum machine_mode mode = GET_MODE (op0);
32703 rtx e1, e2, e3, res, half, mask;
32704 const struct real_format *fmt;
32705 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32706 rtx (*gen_round) (rtx, rtx, rtx);
32711 gen_round = gen_sse4_1_roundsf2;
32714 gen_round = gen_sse4_1_rounddf2;
32717 gcc_unreachable ();
32720 /* e1 = fabs(op1) */
32721 e1 = ix86_expand_sse_fabs (op1, &mask);
32723 /* load nextafter (0.5, 0.0) */
32724 fmt = REAL_MODE_FORMAT (mode);
32725 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32726 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32728 /* e2 = e1 + 0.5 */
32729 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32730 e2 = expand_simple_binop (mode, PLUS, e1, half, NULL_RTX, 0, OPTAB_DIRECT);
32732 /* e3 = trunc(e2) */
32733 e3 = gen_reg_rtx (mode);
32734 emit_insn (gen_round (e3, e2, GEN_INT (ROUND_TRUNC)));
32736 /* res = copysign (e3, op1) */
32737 res = gen_reg_rtx (mode);
32738 ix86_sse_copysign_to_positive (res, e3, op1, mask);
32740 emit_move_insn (op0, res);
32744 /* Table of valid machine attributes. */
32745 static const struct attribute_spec ix86_attribute_table[] =
32747 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32748 affects_type_identity } */
32749 /* Stdcall attribute says callee is responsible for popping arguments
32750 if they are not variable. */
32751 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32753 /* Fastcall attribute says callee is responsible for popping arguments
32754 if they are not variable. */
32755 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32757 /* Thiscall attribute says callee is responsible for popping arguments
32758 if they are not variable. */
32759 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32761 /* Cdecl attribute says the callee is a normal C declaration */
32762 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32764 /* Regparm attribute specifies how many integer arguments are to be
32765 passed in registers. */
32766 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32768 /* Sseregparm attribute says we are using x86_64 calling conventions
32769 for FP arguments. */
32770 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32772 /* force_align_arg_pointer says this function realigns the stack at entry. */
32773 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32774 false, true, true, ix86_handle_cconv_attribute, false },
32775 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32776 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32777 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32778 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32781 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32783 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32785 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32786 SUBTARGET_ATTRIBUTE_TABLE,
32788 /* ms_abi and sysv_abi calling convention function attributes. */
32789 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32790 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32791 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32793 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32794 ix86_handle_callee_pop_aggregate_return, true },
32796 { NULL, 0, 0, false, false, false, NULL, false }
32799 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32801 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32802 tree vectype ATTRIBUTE_UNUSED,
32803 int misalign ATTRIBUTE_UNUSED)
32805 switch (type_of_cost)
32808 return ix86_cost->scalar_stmt_cost;
32811 return ix86_cost->scalar_load_cost;
32814 return ix86_cost->scalar_store_cost;
32817 return ix86_cost->vec_stmt_cost;
32820 return ix86_cost->vec_align_load_cost;
32823 return ix86_cost->vec_store_cost;
32825 case vec_to_scalar:
32826 return ix86_cost->vec_to_scalar_cost;
32828 case scalar_to_vec:
32829 return ix86_cost->scalar_to_vec_cost;
32831 case unaligned_load:
32832 case unaligned_store:
32833 return ix86_cost->vec_unalign_load_cost;
32835 case cond_branch_taken:
32836 return ix86_cost->cond_taken_branch_cost;
32838 case cond_branch_not_taken:
32839 return ix86_cost->cond_not_taken_branch_cost;
32845 gcc_unreachable ();
32850 /* Implement targetm.vectorize.builtin_vec_perm. */
32853 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32855 tree itype = TREE_TYPE (vec_type);
32856 bool u = TYPE_UNSIGNED (itype);
32857 enum machine_mode vmode = TYPE_MODE (vec_type);
32858 enum ix86_builtins fcode;
32859 bool ok = TARGET_SSE2;
32865 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32868 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32870 itype = ix86_get_builtin_type (IX86_BT_DI);
32875 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32879 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32881 itype = ix86_get_builtin_type (IX86_BT_SI);
32885 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32888 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32891 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32894 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32904 *mask_type = itype;
32905 return ix86_builtins[(int) fcode];
32908 /* Return a vector mode with twice as many elements as VMODE. */
32909 /* ??? Consider moving this to a table generated by genmodes.c. */
32911 static enum machine_mode
32912 doublesize_vector_mode (enum machine_mode vmode)
32916 case V2SFmode: return V4SFmode;
32917 case V1DImode: return V2DImode;
32918 case V2SImode: return V4SImode;
32919 case V4HImode: return V8HImode;
32920 case V8QImode: return V16QImode;
32922 case V2DFmode: return V4DFmode;
32923 case V4SFmode: return V8SFmode;
32924 case V2DImode: return V4DImode;
32925 case V4SImode: return V8SImode;
32926 case V8HImode: return V16HImode;
32927 case V16QImode: return V32QImode;
32929 case V4DFmode: return V8DFmode;
32930 case V8SFmode: return V16SFmode;
32931 case V4DImode: return V8DImode;
32932 case V8SImode: return V16SImode;
32933 case V16HImode: return V32HImode;
32934 case V32QImode: return V64QImode;
32937 gcc_unreachable ();
32941 /* Construct (set target (vec_select op0 (parallel perm))) and
32942 return true if that's a valid instruction in the active ISA. */
32945 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32947 rtx rperm[MAX_VECT_LEN], x;
32950 for (i = 0; i < nelt; ++i)
32951 rperm[i] = GEN_INT (perm[i]);
32953 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32954 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32955 x = gen_rtx_SET (VOIDmode, target, x);
32958 if (recog_memoized (x) < 0)
32966 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32969 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32970 const unsigned char *perm, unsigned nelt)
32972 enum machine_mode v2mode;
32975 v2mode = doublesize_vector_mode (GET_MODE (op0));
32976 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32977 return expand_vselect (target, x, perm, nelt);
32980 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32981 in terms of blendp[sd] / pblendw / pblendvb. */
32984 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32986 enum machine_mode vmode = d->vmode;
32987 unsigned i, mask, nelt = d->nelt;
32988 rtx target, op0, op1, x;
32990 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32992 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32995 /* This is a blend, not a permute. Elements must stay in their
32996 respective lanes. */
32997 for (i = 0; i < nelt; ++i)
32999 unsigned e = d->perm[i];
33000 if (!(e == i || e == i + nelt))
33007 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
33008 decision should be extracted elsewhere, so that we only try that
33009 sequence once all budget==3 options have been tried. */
33011 /* For bytes, see if bytes move in pairs so we can use pblendw with
33012 an immediate argument, rather than pblendvb with a vector argument. */
33013 if (vmode == V16QImode)
33015 bool pblendw_ok = true;
33016 for (i = 0; i < 16 && pblendw_ok; i += 2)
33017 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
33021 rtx rperm[16], vperm;
33023 for (i = 0; i < nelt; ++i)
33024 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
33026 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33027 vperm = force_reg (V16QImode, vperm);
33029 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33034 target = d->target;
33046 for (i = 0; i < nelt; ++i)
33047 mask |= (d->perm[i] >= nelt) << i;
33051 for (i = 0; i < 2; ++i)
33052 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33056 for (i = 0; i < 4; ++i)
33057 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33061 for (i = 0; i < 8; ++i)
33062 mask |= (d->perm[i * 2] >= 16) << i;
33066 target = gen_lowpart (vmode, target);
33067 op0 = gen_lowpart (vmode, op0);
33068 op1 = gen_lowpart (vmode, op1);
33072 gcc_unreachable ();
33075 /* This matches five different patterns with the different modes. */
33076 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33077 x = gen_rtx_SET (VOIDmode, target, x);
33083 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33084 in terms of the variable form of vpermilps.
33086 Note that we will have already failed the immediate input vpermilps,
33087 which requires that the high and low part shuffle be identical; the
33088 variable form doesn't require that. */
33091 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33093 rtx rperm[8], vperm;
33096 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33099 /* We can only permute within the 128-bit lane. */
33100 for (i = 0; i < 8; ++i)
33102 unsigned e = d->perm[i];
33103 if (i < 4 ? e >= 4 : e < 4)
33110 for (i = 0; i < 8; ++i)
33112 unsigned e = d->perm[i];
33114 /* Within each 128-bit lane, the elements of op0 are numbered
33115 from 0 and the elements of op1 are numbered from 4. */
33121 rperm[i] = GEN_INT (e);
33124 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33125 vperm = force_reg (V8SImode, vperm);
33126 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33131 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33132 in terms of pshufb or vpperm. */
33135 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33137 unsigned i, nelt, eltsz;
33138 rtx rperm[16], vperm, target, op0, op1;
33140 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33142 if (GET_MODE_SIZE (d->vmode) != 16)
33149 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33151 for (i = 0; i < nelt; ++i)
33153 unsigned j, e = d->perm[i];
33154 for (j = 0; j < eltsz; ++j)
33155 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33158 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33159 vperm = force_reg (V16QImode, vperm);
33161 target = gen_lowpart (V16QImode, d->target);
33162 op0 = gen_lowpart (V16QImode, d->op0);
33163 if (d->op0 == d->op1)
33164 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33167 op1 = gen_lowpart (V16QImode, d->op1);
33168 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33174 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33175 in a single instruction. */
33178 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33180 unsigned i, nelt = d->nelt;
33181 unsigned char perm2[MAX_VECT_LEN];
33183 /* Check plain VEC_SELECT first, because AVX has instructions that could
33184 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33185 input where SEL+CONCAT may not. */
33186 if (d->op0 == d->op1)
33188 int mask = nelt - 1;
33190 for (i = 0; i < nelt; i++)
33191 perm2[i] = d->perm[i] & mask;
33193 if (expand_vselect (d->target, d->op0, perm2, nelt))
33196 /* There are plenty of patterns in sse.md that are written for
33197 SEL+CONCAT and are not replicated for a single op. Perhaps
33198 that should be changed, to avoid the nastiness here. */
33200 /* Recognize interleave style patterns, which means incrementing
33201 every other permutation operand. */
33202 for (i = 0; i < nelt; i += 2)
33204 perm2[i] = d->perm[i] & mask;
33205 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33207 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33210 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33213 for (i = 0; i < nelt; i += 4)
33215 perm2[i + 0] = d->perm[i + 0] & mask;
33216 perm2[i + 1] = d->perm[i + 1] & mask;
33217 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33218 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33221 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33226 /* Finally, try the fully general two operand permute. */
33227 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33230 /* Recognize interleave style patterns with reversed operands. */
33231 if (d->op0 != d->op1)
33233 for (i = 0; i < nelt; ++i)
33235 unsigned e = d->perm[i];
33243 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33247 /* Try the SSE4.1 blend variable merge instructions. */
33248 if (expand_vec_perm_blend (d))
33251 /* Try one of the AVX vpermil variable permutations. */
33252 if (expand_vec_perm_vpermil (d))
33255 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33256 if (expand_vec_perm_pshufb (d))
33262 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33263 in terms of a pair of pshuflw + pshufhw instructions. */
33266 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33268 unsigned char perm2[MAX_VECT_LEN];
33272 if (d->vmode != V8HImode || d->op0 != d->op1)
33275 /* The two permutations only operate in 64-bit lanes. */
33276 for (i = 0; i < 4; ++i)
33277 if (d->perm[i] >= 4)
33279 for (i = 4; i < 8; ++i)
33280 if (d->perm[i] < 4)
33286 /* Emit the pshuflw. */
33287 memcpy (perm2, d->perm, 4);
33288 for (i = 4; i < 8; ++i)
33290 ok = expand_vselect (d->target, d->op0, perm2, 8);
33293 /* Emit the pshufhw. */
33294 memcpy (perm2 + 4, d->perm + 4, 4);
33295 for (i = 0; i < 4; ++i)
33297 ok = expand_vselect (d->target, d->target, perm2, 8);
33303 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33304 the permutation using the SSSE3 palignr instruction. This succeeds
33305 when all of the elements in PERM fit within one vector and we merely
33306 need to shift them down so that a single vector permutation has a
33307 chance to succeed. */
33310 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33312 unsigned i, nelt = d->nelt;
33317 /* Even with AVX, palignr only operates on 128-bit vectors. */
33318 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33321 min = nelt, max = 0;
33322 for (i = 0; i < nelt; ++i)
33324 unsigned e = d->perm[i];
33330 if (min == 0 || max - min >= nelt)
33333 /* Given that we have SSSE3, we know we'll be able to implement the
33334 single operand permutation after the palignr with pshufb. */
33338 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33339 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33340 gen_lowpart (TImode, d->op1),
33341 gen_lowpart (TImode, d->op0), shift));
33343 d->op0 = d->op1 = d->target;
33346 for (i = 0; i < nelt; ++i)
33348 unsigned e = d->perm[i] - min;
33354 /* Test for the degenerate case where the alignment by itself
33355 produces the desired permutation. */
33359 ok = expand_vec_perm_1 (d);
33365 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33366 a two vector permutation into a single vector permutation by using
33367 an interleave operation to merge the vectors. */
33370 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33372 struct expand_vec_perm_d dremap, dfinal;
33373 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33374 unsigned contents, h1, h2, h3, h4;
33375 unsigned char remap[2 * MAX_VECT_LEN];
33379 if (d->op0 == d->op1)
33382 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33383 lanes. We can use similar techniques with the vperm2f128 instruction,
33384 but it requires slightly different logic. */
33385 if (GET_MODE_SIZE (d->vmode) != 16)
33388 /* Examine from whence the elements come. */
33390 for (i = 0; i < nelt; ++i)
33391 contents |= 1u << d->perm[i];
33393 /* Split the two input vectors into 4 halves. */
33394 h1 = (1u << nelt2) - 1;
33399 memset (remap, 0xff, sizeof (remap));
33402 /* If the elements from the low halves use interleave low, and similarly
33403 for interleave high. If the elements are from mis-matched halves, we
33404 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33405 if ((contents & (h1 | h3)) == contents)
33407 for (i = 0; i < nelt2; ++i)
33410 remap[i + nelt] = i * 2 + 1;
33411 dremap.perm[i * 2] = i;
33412 dremap.perm[i * 2 + 1] = i + nelt;
33415 else if ((contents & (h2 | h4)) == contents)
33417 for (i = 0; i < nelt2; ++i)
33419 remap[i + nelt2] = i * 2;
33420 remap[i + nelt + nelt2] = i * 2 + 1;
33421 dremap.perm[i * 2] = i + nelt2;
33422 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33425 else if ((contents & (h1 | h4)) == contents)
33427 for (i = 0; i < nelt2; ++i)
33430 remap[i + nelt + nelt2] = i + nelt2;
33431 dremap.perm[i] = i;
33432 dremap.perm[i + nelt2] = i + nelt + nelt2;
33436 dremap.vmode = V2DImode;
33438 dremap.perm[0] = 0;
33439 dremap.perm[1] = 3;
33442 else if ((contents & (h2 | h3)) == contents)
33444 for (i = 0; i < nelt2; ++i)
33446 remap[i + nelt2] = i;
33447 remap[i + nelt] = i + nelt2;
33448 dremap.perm[i] = i + nelt2;
33449 dremap.perm[i + nelt2] = i + nelt;
33453 dremap.vmode = V2DImode;
33455 dremap.perm[0] = 1;
33456 dremap.perm[1] = 2;
33462 /* Use the remapping array set up above to move the elements from their
33463 swizzled locations into their final destinations. */
33465 for (i = 0; i < nelt; ++i)
33467 unsigned e = remap[d->perm[i]];
33468 gcc_assert (e < nelt);
33469 dfinal.perm[i] = e;
33471 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33472 dfinal.op1 = dfinal.op0;
33473 dremap.target = dfinal.op0;
33475 /* Test if the final remap can be done with a single insn. For V4SFmode or
33476 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33478 ok = expand_vec_perm_1 (&dfinal);
33479 seq = get_insns ();
33485 if (dremap.vmode != dfinal.vmode)
33487 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33488 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33489 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33492 ok = expand_vec_perm_1 (&dremap);
33499 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33500 permutation with two pshufb insns and an ior. We should have already
33501 failed all two instruction sequences. */
33504 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33506 rtx rperm[2][16], vperm, l, h, op, m128;
33507 unsigned int i, nelt, eltsz;
33509 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33511 gcc_assert (d->op0 != d->op1);
33514 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33516 /* Generate two permutation masks. If the required element is within
33517 the given vector it is shuffled into the proper lane. If the required
33518 element is in the other vector, force a zero into the lane by setting
33519 bit 7 in the permutation mask. */
33520 m128 = GEN_INT (-128);
33521 for (i = 0; i < nelt; ++i)
33523 unsigned j, e = d->perm[i];
33524 unsigned which = (e >= nelt);
33528 for (j = 0; j < eltsz; ++j)
33530 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33531 rperm[1-which][i*eltsz + j] = m128;
33535 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33536 vperm = force_reg (V16QImode, vperm);
33538 l = gen_reg_rtx (V16QImode);
33539 op = gen_lowpart (V16QImode, d->op0);
33540 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33542 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33543 vperm = force_reg (V16QImode, vperm);
33545 h = gen_reg_rtx (V16QImode);
33546 op = gen_lowpart (V16QImode, d->op1);
33547 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33549 op = gen_lowpart (V16QImode, d->target);
33550 emit_insn (gen_iorv16qi3 (op, l, h));
33555 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33556 and extract-odd permutations. */
33559 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33566 t1 = gen_reg_rtx (V4DFmode);
33567 t2 = gen_reg_rtx (V4DFmode);
33569 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33570 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33571 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33573 /* Now an unpck[lh]pd will produce the result required. */
33575 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33577 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33583 int mask = odd ? 0xdd : 0x88;
33585 t1 = gen_reg_rtx (V8SFmode);
33586 t2 = gen_reg_rtx (V8SFmode);
33587 t3 = gen_reg_rtx (V8SFmode);
33589 /* Shuffle within the 128-bit lanes to produce:
33590 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33591 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33594 /* Shuffle the lanes around to produce:
33595 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33596 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33599 /* Shuffle within the 128-bit lanes to produce:
33600 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33601 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33603 /* Shuffle within the 128-bit lanes to produce:
33604 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33605 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33607 /* Shuffle the lanes around to produce:
33608 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33609 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33618 /* These are always directly implementable by expand_vec_perm_1. */
33619 gcc_unreachable ();
33623 return expand_vec_perm_pshufb2 (d);
33626 /* We need 2*log2(N)-1 operations to achieve odd/even
33627 with interleave. */
33628 t1 = gen_reg_rtx (V8HImode);
33629 t2 = gen_reg_rtx (V8HImode);
33630 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33631 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33632 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33633 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33635 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33637 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33644 return expand_vec_perm_pshufb2 (d);
33647 t1 = gen_reg_rtx (V16QImode);
33648 t2 = gen_reg_rtx (V16QImode);
33649 t3 = gen_reg_rtx (V16QImode);
33650 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33651 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33652 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33653 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33654 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33655 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33657 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33659 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33665 gcc_unreachable ();
33671 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33672 extract-even and extract-odd permutations. */
33675 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33677 unsigned i, odd, nelt = d->nelt;
33680 if (odd != 0 && odd != 1)
33683 for (i = 1; i < nelt; ++i)
33684 if (d->perm[i] != 2 * i + odd)
33687 return expand_vec_perm_even_odd_1 (d, odd);
33690 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33691 permutations. We assume that expand_vec_perm_1 has already failed. */
33694 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33696 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33697 enum machine_mode vmode = d->vmode;
33698 unsigned char perm2[4];
33706 /* These are special-cased in sse.md so that we can optionally
33707 use the vbroadcast instruction. They expand to two insns
33708 if the input happens to be in a register. */
33709 gcc_unreachable ();
33715 /* These are always implementable using standard shuffle patterns. */
33716 gcc_unreachable ();
33720 /* These can be implemented via interleave. We save one insn by
33721 stopping once we have promoted to V4SImode and then use pshufd. */
33724 optab otab = vec_interleave_low_optab;
33728 otab = vec_interleave_high_optab;
33733 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33734 vmode = get_mode_wider_vector (vmode);
33735 op0 = gen_lowpart (vmode, op0);
33737 while (vmode != V4SImode);
33739 memset (perm2, elt, 4);
33740 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33745 gcc_unreachable ();
33749 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33750 broadcast permutations. */
33753 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33755 unsigned i, elt, nelt = d->nelt;
33757 if (d->op0 != d->op1)
33761 for (i = 1; i < nelt; ++i)
33762 if (d->perm[i] != elt)
33765 return expand_vec_perm_broadcast_1 (d);
33768 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33769 With all of the interface bits taken care of, perform the expansion
33770 in D and return true on success. */
33773 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33775 /* Try a single instruction expansion. */
33776 if (expand_vec_perm_1 (d))
33779 /* Try sequences of two instructions. */
33781 if (expand_vec_perm_pshuflw_pshufhw (d))
33784 if (expand_vec_perm_palignr (d))
33787 if (expand_vec_perm_interleave2 (d))
33790 if (expand_vec_perm_broadcast (d))
33793 /* Try sequences of three instructions. */
33795 if (expand_vec_perm_pshufb2 (d))
33798 /* ??? Look for narrow permutations whose element orderings would
33799 allow the promotion to a wider mode. */
33801 /* ??? Look for sequences of interleave or a wider permute that place
33802 the data into the correct lanes for a half-vector shuffle like
33803 pshuf[lh]w or vpermilps. */
33805 /* ??? Look for sequences of interleave that produce the desired results.
33806 The combinatorics of punpck[lh] get pretty ugly... */
33808 if (expand_vec_perm_even_odd (d))
33814 /* Extract the values from the vector CST into the permutation array in D.
33815 Return 0 on error, 1 if all values from the permutation come from the
33816 first vector, 2 if all values from the second vector, and 3 otherwise. */
33819 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33821 tree list = TREE_VECTOR_CST_ELTS (cst);
33822 unsigned i, nelt = d->nelt;
33825 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33827 unsigned HOST_WIDE_INT e;
33829 if (!host_integerp (TREE_VALUE (list), 1))
33831 e = tree_low_cst (TREE_VALUE (list), 1);
33835 ret |= (e < nelt ? 1 : 2);
33838 gcc_assert (list == NULL);
33840 /* For all elements from second vector, fold the elements to first. */
33842 for (i = 0; i < nelt; ++i)
33843 d->perm[i] -= nelt;
33849 ix86_expand_vec_perm_builtin (tree exp)
33851 struct expand_vec_perm_d d;
33852 tree arg0, arg1, arg2;
33854 arg0 = CALL_EXPR_ARG (exp, 0);
33855 arg1 = CALL_EXPR_ARG (exp, 1);
33856 arg2 = CALL_EXPR_ARG (exp, 2);
33858 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33859 d.nelt = GET_MODE_NUNITS (d.vmode);
33860 d.testing_p = false;
33861 gcc_assert (VECTOR_MODE_P (d.vmode));
33863 if (TREE_CODE (arg2) != VECTOR_CST)
33865 error_at (EXPR_LOCATION (exp),
33866 "vector permutation requires vector constant");
33870 switch (extract_vec_perm_cst (&d, arg2))
33876 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33880 if (!operand_equal_p (arg0, arg1, 0))
33882 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33883 d.op0 = force_reg (d.vmode, d.op0);
33884 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33885 d.op1 = force_reg (d.vmode, d.op1);
33889 /* The elements of PERM do not suggest that only the first operand
33890 is used, but both operands are identical. Allow easier matching
33891 of the permutation by folding the permutation into the single
33894 unsigned i, nelt = d.nelt;
33895 for (i = 0; i < nelt; ++i)
33896 if (d.perm[i] >= nelt)
33902 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33903 d.op0 = force_reg (d.vmode, d.op0);
33908 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33909 d.op0 = force_reg (d.vmode, d.op0);
33914 d.target = gen_reg_rtx (d.vmode);
33915 if (ix86_expand_vec_perm_builtin_1 (&d))
33918 /* For compiler generated permutations, we should never got here, because
33919 the compiler should also be checking the ok hook. But since this is a
33920 builtin the user has access too, so don't abort. */
33924 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33927 sorry ("vector permutation (%d %d %d %d)",
33928 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33931 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33932 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33933 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33936 sorry ("vector permutation "
33937 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33938 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33939 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33940 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33941 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33944 gcc_unreachable ();
33947 return CONST0_RTX (d.vmode);
33950 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33953 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33955 struct expand_vec_perm_d d;
33959 d.vmode = TYPE_MODE (vec_type);
33960 d.nelt = GET_MODE_NUNITS (d.vmode);
33961 d.testing_p = true;
33963 /* Given sufficient ISA support we can just return true here
33964 for selected vector modes. */
33965 if (GET_MODE_SIZE (d.vmode) == 16)
33967 /* All implementable with a single vpperm insn. */
33970 /* All implementable with 2 pshufb + 1 ior. */
33973 /* All implementable with shufpd or unpck[lh]pd. */
33978 vec_mask = extract_vec_perm_cst (&d, mask);
33980 /* This hook is cannot be called in response to something that the
33981 user does (unlike the builtin expander) so we shouldn't ever see
33982 an error generated from the extract. */
33983 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33984 one_vec = (vec_mask != 3);
33986 /* Implementable with shufps or pshufd. */
33987 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33990 /* Otherwise we have to go through the motions and see if we can
33991 figure out how to generate the requested permutation. */
33992 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33993 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33995 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33998 ret = ix86_expand_vec_perm_builtin_1 (&d);
34005 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
34007 struct expand_vec_perm_d d;
34013 d.vmode = GET_MODE (targ);
34014 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
34015 d.testing_p = false;
34017 for (i = 0; i < nelt; ++i)
34018 d.perm[i] = i * 2 + odd;
34020 /* We'll either be able to implement the permutation directly... */
34021 if (expand_vec_perm_1 (&d))
34024 /* ... or we use the special-case patterns. */
34025 expand_vec_perm_even_odd_1 (&d, odd);
34028 /* Expand an insert into a vector register through pinsr insn.
34029 Return true if successful. */
34032 ix86_expand_pinsr (rtx *operands)
34034 rtx dst = operands[0];
34035 rtx src = operands[3];
34037 unsigned int size = INTVAL (operands[1]);
34038 unsigned int pos = INTVAL (operands[2]);
34040 if (GET_CODE (dst) == SUBREG)
34042 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
34043 dst = SUBREG_REG (dst);
34046 if (GET_CODE (src) == SUBREG)
34047 src = SUBREG_REG (src);
34049 switch (GET_MODE (dst))
34056 enum machine_mode srcmode, dstmode;
34057 rtx (*pinsr)(rtx, rtx, rtx, rtx);
34059 srcmode = mode_for_size (size, MODE_INT, 0);
34064 if (!TARGET_SSE4_1)
34066 dstmode = V16QImode;
34067 pinsr = gen_sse4_1_pinsrb;
34073 dstmode = V8HImode;
34074 pinsr = gen_sse2_pinsrw;
34078 if (!TARGET_SSE4_1)
34080 dstmode = V4SImode;
34081 pinsr = gen_sse4_1_pinsrd;
34085 gcc_assert (TARGET_64BIT);
34086 if (!TARGET_SSE4_1)
34088 dstmode = V2DImode;
34089 pinsr = gen_sse4_1_pinsrq;
34096 dst = gen_lowpart (dstmode, dst);
34097 src = gen_lowpart (srcmode, src);
34101 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
34110 /* This function returns the calling abi specific va_list type node.
34111 It returns the FNDECL specific va_list type. */
34114 ix86_fn_abi_va_list (tree fndecl)
34117 return va_list_type_node;
34118 gcc_assert (fndecl != NULL_TREE);
34120 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34121 return ms_va_list_type_node;
34123 return sysv_va_list_type_node;
34126 /* Returns the canonical va_list type specified by TYPE. If there
34127 is no valid TYPE provided, it return NULL_TREE. */
34130 ix86_canonical_va_list_type (tree type)
34134 /* Resolve references and pointers to va_list type. */
34135 if (TREE_CODE (type) == MEM_REF)
34136 type = TREE_TYPE (type);
34137 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34138 type = TREE_TYPE (type);
34139 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34140 type = TREE_TYPE (type);
34142 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34144 wtype = va_list_type_node;
34145 gcc_assert (wtype != NULL_TREE);
34147 if (TREE_CODE (wtype) == ARRAY_TYPE)
34149 /* If va_list is an array type, the argument may have decayed
34150 to a pointer type, e.g. by being passed to another function.
34151 In that case, unwrap both types so that we can compare the
34152 underlying records. */
34153 if (TREE_CODE (htype) == ARRAY_TYPE
34154 || POINTER_TYPE_P (htype))
34156 wtype = TREE_TYPE (wtype);
34157 htype = TREE_TYPE (htype);
34160 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34161 return va_list_type_node;
34162 wtype = sysv_va_list_type_node;
34163 gcc_assert (wtype != NULL_TREE);
34165 if (TREE_CODE (wtype) == ARRAY_TYPE)
34167 /* If va_list is an array type, the argument may have decayed
34168 to a pointer type, e.g. by being passed to another function.
34169 In that case, unwrap both types so that we can compare the
34170 underlying records. */
34171 if (TREE_CODE (htype) == ARRAY_TYPE
34172 || POINTER_TYPE_P (htype))
34174 wtype = TREE_TYPE (wtype);
34175 htype = TREE_TYPE (htype);
34178 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34179 return sysv_va_list_type_node;
34180 wtype = ms_va_list_type_node;
34181 gcc_assert (wtype != NULL_TREE);
34183 if (TREE_CODE (wtype) == ARRAY_TYPE)
34185 /* If va_list is an array type, the argument may have decayed
34186 to a pointer type, e.g. by being passed to another function.
34187 In that case, unwrap both types so that we can compare the
34188 underlying records. */
34189 if (TREE_CODE (htype) == ARRAY_TYPE
34190 || POINTER_TYPE_P (htype))
34192 wtype = TREE_TYPE (wtype);
34193 htype = TREE_TYPE (htype);
34196 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34197 return ms_va_list_type_node;
34200 return std_canonical_va_list_type (type);
34203 /* Iterate through the target-specific builtin types for va_list.
34204 IDX denotes the iterator, *PTREE is set to the result type of
34205 the va_list builtin, and *PNAME to its internal type.
34206 Returns zero if there is no element for this index, otherwise
34207 IDX should be increased upon the next call.
34208 Note, do not iterate a base builtin's name like __builtin_va_list.
34209 Used from c_common_nodes_and_builtins. */
34212 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34222 *ptree = ms_va_list_type_node;
34223 *pname = "__builtin_ms_va_list";
34227 *ptree = sysv_va_list_type_node;
34228 *pname = "__builtin_sysv_va_list";
34236 #undef TARGET_SCHED_DISPATCH
34237 #define TARGET_SCHED_DISPATCH has_dispatch
34238 #undef TARGET_SCHED_DISPATCH_DO
34239 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34241 /* The size of the dispatch window is the total number of bytes of
34242 object code allowed in a window. */
34243 #define DISPATCH_WINDOW_SIZE 16
34245 /* Number of dispatch windows considered for scheduling. */
34246 #define MAX_DISPATCH_WINDOWS 3
34248 /* Maximum number of instructions in a window. */
34251 /* Maximum number of immediate operands in a window. */
34254 /* Maximum number of immediate bits allowed in a window. */
34255 #define MAX_IMM_SIZE 128
34257 /* Maximum number of 32 bit immediates allowed in a window. */
34258 #define MAX_IMM_32 4
34260 /* Maximum number of 64 bit immediates allowed in a window. */
34261 #define MAX_IMM_64 2
34263 /* Maximum total of loads or prefetches allowed in a window. */
34266 /* Maximum total of stores allowed in a window. */
34267 #define MAX_STORE 1
34273 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34274 enum dispatch_group {
34289 /* Number of allowable groups in a dispatch window. It is an array
34290 indexed by dispatch_group enum. 100 is used as a big number,
34291 because the number of these kind of operations does not have any
34292 effect in dispatch window, but we need them for other reasons in
34294 static unsigned int num_allowable_groups[disp_last] = {
34295 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34298 char group_name[disp_last + 1][16] = {
34299 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34300 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34301 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34304 /* Instruction path. */
34307 path_single, /* Single micro op. */
34308 path_double, /* Double micro op. */
34309 path_multi, /* Instructions with more than 2 micro op.. */
34313 /* sched_insn_info defines a window to the instructions scheduled in
34314 the basic block. It contains a pointer to the insn_info table and
34315 the instruction scheduled.
34317 Windows are allocated for each basic block and are linked
34319 typedef struct sched_insn_info_s {
34321 enum dispatch_group group;
34322 enum insn_path path;
34327 /* Linked list of dispatch windows. This is a two way list of
34328 dispatch windows of a basic block. It contains information about
34329 the number of uops in the window and the total number of
34330 instructions and of bytes in the object code for this dispatch
34332 typedef struct dispatch_windows_s {
34333 int num_insn; /* Number of insn in the window. */
34334 int num_uops; /* Number of uops in the window. */
34335 int window_size; /* Number of bytes in the window. */
34336 int window_num; /* Window number between 0 or 1. */
34337 int num_imm; /* Number of immediates in an insn. */
34338 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34339 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34340 int imm_size; /* Total immediates in the window. */
34341 int num_loads; /* Total memory loads in the window. */
34342 int num_stores; /* Total memory stores in the window. */
34343 int violation; /* Violation exists in window. */
34344 sched_insn_info *window; /* Pointer to the window. */
34345 struct dispatch_windows_s *next;
34346 struct dispatch_windows_s *prev;
34347 } dispatch_windows;
34349 /* Immediate valuse used in an insn. */
34350 typedef struct imm_info_s
34357 static dispatch_windows *dispatch_window_list;
34358 static dispatch_windows *dispatch_window_list1;
34360 /* Get dispatch group of insn. */
34362 static enum dispatch_group
34363 get_mem_group (rtx insn)
34365 enum attr_memory memory;
34367 if (INSN_CODE (insn) < 0)
34368 return disp_no_group;
34369 memory = get_attr_memory (insn);
34370 if (memory == MEMORY_STORE)
34373 if (memory == MEMORY_LOAD)
34376 if (memory == MEMORY_BOTH)
34377 return disp_load_store;
34379 return disp_no_group;
34382 /* Return true if insn is a compare instruction. */
34387 enum attr_type type;
34389 type = get_attr_type (insn);
34390 return (type == TYPE_TEST
34391 || type == TYPE_ICMP
34392 || type == TYPE_FCMP
34393 || GET_CODE (PATTERN (insn)) == COMPARE);
34396 /* Return true if a dispatch violation encountered. */
34399 dispatch_violation (void)
34401 if (dispatch_window_list->next)
34402 return dispatch_window_list->next->violation;
34403 return dispatch_window_list->violation;
34406 /* Return true if insn is a branch instruction. */
34409 is_branch (rtx insn)
34411 return (CALL_P (insn) || JUMP_P (insn));
34414 /* Return true if insn is a prefetch instruction. */
34417 is_prefetch (rtx insn)
34419 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34422 /* This function initializes a dispatch window and the list container holding a
34423 pointer to the window. */
34426 init_window (int window_num)
34429 dispatch_windows *new_list;
34431 if (window_num == 0)
34432 new_list = dispatch_window_list;
34434 new_list = dispatch_window_list1;
34436 new_list->num_insn = 0;
34437 new_list->num_uops = 0;
34438 new_list->window_size = 0;
34439 new_list->next = NULL;
34440 new_list->prev = NULL;
34441 new_list->window_num = window_num;
34442 new_list->num_imm = 0;
34443 new_list->num_imm_32 = 0;
34444 new_list->num_imm_64 = 0;
34445 new_list->imm_size = 0;
34446 new_list->num_loads = 0;
34447 new_list->num_stores = 0;
34448 new_list->violation = false;
34450 for (i = 0; i < MAX_INSN; i++)
34452 new_list->window[i].insn = NULL;
34453 new_list->window[i].group = disp_no_group;
34454 new_list->window[i].path = no_path;
34455 new_list->window[i].byte_len = 0;
34456 new_list->window[i].imm_bytes = 0;
34461 /* This function allocates and initializes a dispatch window and the
34462 list container holding a pointer to the window. */
34464 static dispatch_windows *
34465 allocate_window (void)
34467 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34468 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34473 /* This routine initializes the dispatch scheduling information. It
34474 initiates building dispatch scheduler tables and constructs the
34475 first dispatch window. */
34478 init_dispatch_sched (void)
34480 /* Allocate a dispatch list and a window. */
34481 dispatch_window_list = allocate_window ();
34482 dispatch_window_list1 = allocate_window ();
34487 /* This function returns true if a branch is detected. End of a basic block
34488 does not have to be a branch, but here we assume only branches end a
34492 is_end_basic_block (enum dispatch_group group)
34494 return group == disp_branch;
34497 /* This function is called when the end of a window processing is reached. */
34500 process_end_window (void)
34502 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34503 if (dispatch_window_list->next)
34505 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34506 gcc_assert (dispatch_window_list->window_size
34507 + dispatch_window_list1->window_size <= 48);
34513 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34514 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34515 for 48 bytes of instructions. Note that these windows are not dispatch
34516 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34518 static dispatch_windows *
34519 allocate_next_window (int window_num)
34521 if (window_num == 0)
34523 if (dispatch_window_list->next)
34526 return dispatch_window_list;
34529 dispatch_window_list->next = dispatch_window_list1;
34530 dispatch_window_list1->prev = dispatch_window_list;
34532 return dispatch_window_list1;
34535 /* Increment the number of immediate operands of an instruction. */
34538 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34543 switch ( GET_CODE (*in_rtx))
34548 (imm_values->imm)++;
34549 if (x86_64_immediate_operand (*in_rtx, SImode))
34550 (imm_values->imm32)++;
34552 (imm_values->imm64)++;
34556 (imm_values->imm)++;
34557 (imm_values->imm64)++;
34561 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34563 (imm_values->imm)++;
34564 (imm_values->imm32)++;
34575 /* Compute number of immediate operands of an instruction. */
34578 find_constant (rtx in_rtx, imm_info *imm_values)
34580 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34581 (rtx_function) find_constant_1, (void *) imm_values);
34584 /* Return total size of immediate operands of an instruction along with number
34585 of corresponding immediate-operands. It initializes its parameters to zero
34586 befor calling FIND_CONSTANT.
34587 INSN is the input instruction. IMM is the total of immediates.
34588 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34592 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34594 imm_info imm_values = {0, 0, 0};
34596 find_constant (insn, &imm_values);
34597 *imm = imm_values.imm;
34598 *imm32 = imm_values.imm32;
34599 *imm64 = imm_values.imm64;
34600 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34603 /* This function indicates if an operand of an instruction is an
34607 has_immediate (rtx insn)
34609 int num_imm_operand;
34610 int num_imm32_operand;
34611 int num_imm64_operand;
34614 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34615 &num_imm64_operand);
34619 /* Return single or double path for instructions. */
34621 static enum insn_path
34622 get_insn_path (rtx insn)
34624 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34626 if ((int)path == 0)
34627 return path_single;
34629 if ((int)path == 1)
34630 return path_double;
34635 /* Return insn dispatch group. */
34637 static enum dispatch_group
34638 get_insn_group (rtx insn)
34640 enum dispatch_group group = get_mem_group (insn);
34644 if (is_branch (insn))
34645 return disp_branch;
34650 if (has_immediate (insn))
34653 if (is_prefetch (insn))
34654 return disp_prefetch;
34656 return disp_no_group;
34659 /* Count number of GROUP restricted instructions in a dispatch
34660 window WINDOW_LIST. */
34663 count_num_restricted (rtx insn, dispatch_windows *window_list)
34665 enum dispatch_group group = get_insn_group (insn);
34667 int num_imm_operand;
34668 int num_imm32_operand;
34669 int num_imm64_operand;
34671 if (group == disp_no_group)
34674 if (group == disp_imm)
34676 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34677 &num_imm64_operand);
34678 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34679 || num_imm_operand + window_list->num_imm > MAX_IMM
34680 || (num_imm32_operand > 0
34681 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34682 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34683 || (num_imm64_operand > 0
34684 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34685 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34686 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34687 && num_imm64_operand > 0
34688 && ((window_list->num_imm_64 > 0
34689 && window_list->num_insn >= 2)
34690 || window_list->num_insn >= 3)))
34696 if ((group == disp_load_store
34697 && (window_list->num_loads >= MAX_LOAD
34698 || window_list->num_stores >= MAX_STORE))
34699 || ((group == disp_load
34700 || group == disp_prefetch)
34701 && window_list->num_loads >= MAX_LOAD)
34702 || (group == disp_store
34703 && window_list->num_stores >= MAX_STORE))
34709 /* This function returns true if insn satisfies dispatch rules on the
34710 last window scheduled. */
34713 fits_dispatch_window (rtx insn)
34715 dispatch_windows *window_list = dispatch_window_list;
34716 dispatch_windows *window_list_next = dispatch_window_list->next;
34717 unsigned int num_restrict;
34718 enum dispatch_group group = get_insn_group (insn);
34719 enum insn_path path = get_insn_path (insn);
34722 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34723 instructions should be given the lowest priority in the
34724 scheduling process in Haifa scheduler to make sure they will be
34725 scheduled in the same dispatch window as the refrence to them. */
34726 if (group == disp_jcc || group == disp_cmp)
34729 /* Check nonrestricted. */
34730 if (group == disp_no_group || group == disp_branch)
34733 /* Get last dispatch window. */
34734 if (window_list_next)
34735 window_list = window_list_next;
34737 if (window_list->window_num == 1)
34739 sum = window_list->prev->window_size + window_list->window_size;
34742 || (min_insn_size (insn) + sum) >= 48)
34743 /* Window 1 is full. Go for next window. */
34747 num_restrict = count_num_restricted (insn, window_list);
34749 if (num_restrict > num_allowable_groups[group])
34752 /* See if it fits in the first window. */
34753 if (window_list->window_num == 0)
34755 /* The first widow should have only single and double path
34757 if (path == path_double
34758 && (window_list->num_uops + 2) > MAX_INSN)
34760 else if (path != path_single)
34766 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34767 dispatch window WINDOW_LIST. */
34770 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34772 int byte_len = min_insn_size (insn);
34773 int num_insn = window_list->num_insn;
34775 sched_insn_info *window = window_list->window;
34776 enum dispatch_group group = get_insn_group (insn);
34777 enum insn_path path = get_insn_path (insn);
34778 int num_imm_operand;
34779 int num_imm32_operand;
34780 int num_imm64_operand;
34782 if (!window_list->violation && group != disp_cmp
34783 && !fits_dispatch_window (insn))
34784 window_list->violation = true;
34786 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34787 &num_imm64_operand);
34789 /* Initialize window with new instruction. */
34790 window[num_insn].insn = insn;
34791 window[num_insn].byte_len = byte_len;
34792 window[num_insn].group = group;
34793 window[num_insn].path = path;
34794 window[num_insn].imm_bytes = imm_size;
34796 window_list->window_size += byte_len;
34797 window_list->num_insn = num_insn + 1;
34798 window_list->num_uops = window_list->num_uops + num_uops;
34799 window_list->imm_size += imm_size;
34800 window_list->num_imm += num_imm_operand;
34801 window_list->num_imm_32 += num_imm32_operand;
34802 window_list->num_imm_64 += num_imm64_operand;
34804 if (group == disp_store)
34805 window_list->num_stores += 1;
34806 else if (group == disp_load
34807 || group == disp_prefetch)
34808 window_list->num_loads += 1;
34809 else if (group == disp_load_store)
34811 window_list->num_stores += 1;
34812 window_list->num_loads += 1;
34816 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34817 If the total bytes of instructions or the number of instructions in
34818 the window exceed allowable, it allocates a new window. */
34821 add_to_dispatch_window (rtx insn)
34824 dispatch_windows *window_list;
34825 dispatch_windows *next_list;
34826 dispatch_windows *window0_list;
34827 enum insn_path path;
34828 enum dispatch_group insn_group;
34836 if (INSN_CODE (insn) < 0)
34839 byte_len = min_insn_size (insn);
34840 window_list = dispatch_window_list;
34841 next_list = window_list->next;
34842 path = get_insn_path (insn);
34843 insn_group = get_insn_group (insn);
34845 /* Get the last dispatch window. */
34847 window_list = dispatch_window_list->next;
34849 if (path == path_single)
34851 else if (path == path_double)
34854 insn_num_uops = (int) path;
34856 /* If current window is full, get a new window.
34857 Window number zero is full, if MAX_INSN uops are scheduled in it.
34858 Window number one is full, if window zero's bytes plus window
34859 one's bytes is 32, or if the bytes of the new instruction added
34860 to the total makes it greater than 48, or it has already MAX_INSN
34861 instructions in it. */
34862 num_insn = window_list->num_insn;
34863 num_uops = window_list->num_uops;
34864 window_num = window_list->window_num;
34865 insn_fits = fits_dispatch_window (insn);
34867 if (num_insn >= MAX_INSN
34868 || num_uops + insn_num_uops > MAX_INSN
34871 window_num = ~window_num & 1;
34872 window_list = allocate_next_window (window_num);
34875 if (window_num == 0)
34877 add_insn_window (insn, window_list, insn_num_uops);
34878 if (window_list->num_insn >= MAX_INSN
34879 && insn_group == disp_branch)
34881 process_end_window ();
34885 else if (window_num == 1)
34887 window0_list = window_list->prev;
34888 sum = window0_list->window_size + window_list->window_size;
34890 || (byte_len + sum) >= 48)
34892 process_end_window ();
34893 window_list = dispatch_window_list;
34896 add_insn_window (insn, window_list, insn_num_uops);
34899 gcc_unreachable ();
34901 if (is_end_basic_block (insn_group))
34903 /* End of basic block is reached do end-basic-block process. */
34904 process_end_window ();
34909 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34911 DEBUG_FUNCTION static void
34912 debug_dispatch_window_file (FILE *file, int window_num)
34914 dispatch_windows *list;
34917 if (window_num == 0)
34918 list = dispatch_window_list;
34920 list = dispatch_window_list1;
34922 fprintf (file, "Window #%d:\n", list->window_num);
34923 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34924 list->num_insn, list->num_uops, list->window_size);
34925 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34926 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34928 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34930 fprintf (file, " insn info:\n");
34932 for (i = 0; i < MAX_INSN; i++)
34934 if (!list->window[i].insn)
34936 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34937 i, group_name[list->window[i].group],
34938 i, (void *)list->window[i].insn,
34939 i, list->window[i].path,
34940 i, list->window[i].byte_len,
34941 i, list->window[i].imm_bytes);
34945 /* Print to stdout a dispatch window. */
34947 DEBUG_FUNCTION void
34948 debug_dispatch_window (int window_num)
34950 debug_dispatch_window_file (stdout, window_num);
34953 /* Print INSN dispatch information to FILE. */
34955 DEBUG_FUNCTION static void
34956 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34959 enum insn_path path;
34960 enum dispatch_group group;
34962 int num_imm_operand;
34963 int num_imm32_operand;
34964 int num_imm64_operand;
34966 if (INSN_CODE (insn) < 0)
34969 byte_len = min_insn_size (insn);
34970 path = get_insn_path (insn);
34971 group = get_insn_group (insn);
34972 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34973 &num_imm64_operand);
34975 fprintf (file, " insn info:\n");
34976 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34977 group_name[group], path, byte_len);
34978 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34979 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34982 /* Print to STDERR the status of the ready list with respect to
34983 dispatch windows. */
34985 DEBUG_FUNCTION void
34986 debug_ready_dispatch (void)
34989 int no_ready = number_in_ready ();
34991 fprintf (stdout, "Number of ready: %d\n", no_ready);
34993 for (i = 0; i < no_ready; i++)
34994 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34997 /* This routine is the driver of the dispatch scheduler. */
35000 do_dispatch (rtx insn, int mode)
35002 if (mode == DISPATCH_INIT)
35003 init_dispatch_sched ();
35004 else if (mode == ADD_TO_DISPATCH_WINDOW)
35005 add_to_dispatch_window (insn);
35008 /* Return TRUE if Dispatch Scheduling is supported. */
35011 has_dispatch (rtx insn, int action)
35013 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
35014 && flag_dispatch_scheduler)
35020 case IS_DISPATCH_ON:
35025 return is_cmp (insn);
35027 case DISPATCH_VIOLATION:
35028 return dispatch_violation ();
35030 case FITS_DISPATCH_WINDOW:
35031 return fits_dispatch_window (insn);
35037 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
35038 place emms and femms instructions. */
35040 static enum machine_mode
35041 ix86_preferred_simd_mode (enum machine_mode mode)
35058 if (TARGET_AVX && !TARGET_PREFER_AVX128)
35064 if (!TARGET_VECTORIZE_DOUBLE)
35066 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
35068 else if (TARGET_SSE2)
35077 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
35080 static unsigned int
35081 ix86_autovectorize_vector_sizes (void)
35083 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
35086 /* Initialize the GCC target structure. */
35087 #undef TARGET_RETURN_IN_MEMORY
35088 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
35090 #undef TARGET_LEGITIMIZE_ADDRESS
35091 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
35093 #undef TARGET_ATTRIBUTE_TABLE
35094 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
35095 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35096 # undef TARGET_MERGE_DECL_ATTRIBUTES
35097 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
35100 #undef TARGET_COMP_TYPE_ATTRIBUTES
35101 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
35103 #undef TARGET_INIT_BUILTINS
35104 #define TARGET_INIT_BUILTINS ix86_init_builtins
35105 #undef TARGET_BUILTIN_DECL
35106 #define TARGET_BUILTIN_DECL ix86_builtin_decl
35107 #undef TARGET_EXPAND_BUILTIN
35108 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
35110 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
35111 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
35112 ix86_builtin_vectorized_function
35114 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
35115 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
35117 #undef TARGET_BUILTIN_RECIPROCAL
35118 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
35120 #undef TARGET_ASM_FUNCTION_EPILOGUE
35121 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35123 #undef TARGET_ENCODE_SECTION_INFO
35124 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35125 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35127 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35130 #undef TARGET_ASM_OPEN_PAREN
35131 #define TARGET_ASM_OPEN_PAREN ""
35132 #undef TARGET_ASM_CLOSE_PAREN
35133 #define TARGET_ASM_CLOSE_PAREN ""
35135 #undef TARGET_ASM_BYTE_OP
35136 #define TARGET_ASM_BYTE_OP ASM_BYTE
35138 #undef TARGET_ASM_ALIGNED_HI_OP
35139 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35140 #undef TARGET_ASM_ALIGNED_SI_OP
35141 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35143 #undef TARGET_ASM_ALIGNED_DI_OP
35144 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35147 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35148 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35150 #undef TARGET_ASM_UNALIGNED_HI_OP
35151 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35152 #undef TARGET_ASM_UNALIGNED_SI_OP
35153 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35154 #undef TARGET_ASM_UNALIGNED_DI_OP
35155 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35157 #undef TARGET_PRINT_OPERAND
35158 #define TARGET_PRINT_OPERAND ix86_print_operand
35159 #undef TARGET_PRINT_OPERAND_ADDRESS
35160 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35161 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35162 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35163 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35164 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35166 #undef TARGET_SCHED_INIT_GLOBAL
35167 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35168 #undef TARGET_SCHED_ADJUST_COST
35169 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35170 #undef TARGET_SCHED_ISSUE_RATE
35171 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35172 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35173 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35174 ia32_multipass_dfa_lookahead
35176 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35177 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35180 #undef TARGET_HAVE_TLS
35181 #define TARGET_HAVE_TLS true
35183 #undef TARGET_CANNOT_FORCE_CONST_MEM
35184 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35185 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35186 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35188 #undef TARGET_DELEGITIMIZE_ADDRESS
35189 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35191 #undef TARGET_MS_BITFIELD_LAYOUT_P
35192 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35195 #undef TARGET_BINDS_LOCAL_P
35196 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35198 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35199 #undef TARGET_BINDS_LOCAL_P
35200 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35203 #undef TARGET_ASM_OUTPUT_MI_THUNK
35204 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35205 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35206 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35208 #undef TARGET_ASM_FILE_START
35209 #define TARGET_ASM_FILE_START x86_file_start
35211 #undef TARGET_OPTION_OVERRIDE
35212 #define TARGET_OPTION_OVERRIDE ix86_option_override
35214 #undef TARGET_REGISTER_MOVE_COST
35215 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35216 #undef TARGET_MEMORY_MOVE_COST
35217 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35218 #undef TARGET_RTX_COSTS
35219 #define TARGET_RTX_COSTS ix86_rtx_costs
35220 #undef TARGET_ADDRESS_COST
35221 #define TARGET_ADDRESS_COST ix86_address_cost
35223 #undef TARGET_FIXED_CONDITION_CODE_REGS
35224 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35225 #undef TARGET_CC_MODES_COMPATIBLE
35226 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35228 #undef TARGET_MACHINE_DEPENDENT_REORG
35229 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35231 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35232 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35234 #undef TARGET_BUILD_BUILTIN_VA_LIST
35235 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35237 #undef TARGET_ENUM_VA_LIST_P
35238 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35240 #undef TARGET_FN_ABI_VA_LIST
35241 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35243 #undef TARGET_CANONICAL_VA_LIST_TYPE
35244 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35246 #undef TARGET_EXPAND_BUILTIN_VA_START
35247 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35249 #undef TARGET_MD_ASM_CLOBBERS
35250 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35252 #undef TARGET_PROMOTE_PROTOTYPES
35253 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35254 #undef TARGET_STRUCT_VALUE_RTX
35255 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35256 #undef TARGET_SETUP_INCOMING_VARARGS
35257 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35258 #undef TARGET_MUST_PASS_IN_STACK
35259 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35260 #undef TARGET_FUNCTION_ARG_ADVANCE
35261 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35262 #undef TARGET_FUNCTION_ARG
35263 #define TARGET_FUNCTION_ARG ix86_function_arg
35264 #undef TARGET_FUNCTION_ARG_BOUNDARY
35265 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35266 #undef TARGET_PASS_BY_REFERENCE
35267 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35268 #undef TARGET_INTERNAL_ARG_POINTER
35269 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35270 #undef TARGET_UPDATE_STACK_BOUNDARY
35271 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35272 #undef TARGET_GET_DRAP_RTX
35273 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35274 #undef TARGET_STRICT_ARGUMENT_NAMING
35275 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35276 #undef TARGET_STATIC_CHAIN
35277 #define TARGET_STATIC_CHAIN ix86_static_chain
35278 #undef TARGET_TRAMPOLINE_INIT
35279 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35280 #undef TARGET_RETURN_POPS_ARGS
35281 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35283 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35284 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35286 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35287 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35289 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35290 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35292 #undef TARGET_C_MODE_FOR_SUFFIX
35293 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35296 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35297 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35300 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35301 #undef TARGET_INSERT_ATTRIBUTES
35302 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35305 #undef TARGET_MANGLE_TYPE
35306 #define TARGET_MANGLE_TYPE ix86_mangle_type
35308 #ifndef TARGET_MACHO
35309 #undef TARGET_STACK_PROTECT_FAIL
35310 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35313 #undef TARGET_FUNCTION_VALUE
35314 #define TARGET_FUNCTION_VALUE ix86_function_value
35316 #undef TARGET_FUNCTION_VALUE_REGNO_P
35317 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35319 #undef TARGET_PROMOTE_FUNCTION_MODE
35320 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
35322 #undef TARGET_SECONDARY_RELOAD
35323 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35325 #undef TARGET_CLASS_MAX_NREGS
35326 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
35328 #undef TARGET_PREFERRED_RELOAD_CLASS
35329 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35330 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35331 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35332 #undef TARGET_CLASS_LIKELY_SPILLED_P
35333 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35335 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35336 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35337 ix86_builtin_vectorization_cost
35338 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35339 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35340 ix86_vectorize_builtin_vec_perm
35341 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35342 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35343 ix86_vectorize_builtin_vec_perm_ok
35344 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35345 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35346 ix86_preferred_simd_mode
35347 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35348 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35349 ix86_autovectorize_vector_sizes
35351 #undef TARGET_SET_CURRENT_FUNCTION
35352 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35354 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35355 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35357 #undef TARGET_OPTION_SAVE
35358 #define TARGET_OPTION_SAVE ix86_function_specific_save
35360 #undef TARGET_OPTION_RESTORE
35361 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35363 #undef TARGET_OPTION_PRINT
35364 #define TARGET_OPTION_PRINT ix86_function_specific_print
35366 #undef TARGET_CAN_INLINE_P
35367 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35369 #undef TARGET_EXPAND_TO_RTL_HOOK
35370 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35372 #undef TARGET_LEGITIMATE_ADDRESS_P
35373 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35375 #undef TARGET_LEGITIMATE_CONSTANT_P
35376 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35378 #undef TARGET_FRAME_POINTER_REQUIRED
35379 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35381 #undef TARGET_CAN_ELIMINATE
35382 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35384 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35385 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35387 #undef TARGET_ASM_CODE_END
35388 #define TARGET_ASM_CODE_END ix86_code_end
35390 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35391 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35394 #undef TARGET_INIT_LIBFUNCS
35395 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35398 struct gcc_target targetm = TARGET_INITIALIZER;
35400 #include "gt-i386.h"