1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
65 enum upper_128bits_state
72 typedef struct block_info_def
74 /* State of the upper 128bits of AVX registers at exit. */
75 enum upper_128bits_state state;
76 /* TRUE if state of the upper 128bits of AVX registers is unchanged
79 /* TRUE if block has been processed. */
81 /* TRUE if block has been scanned. */
83 /* Previous state of the upper 128bits of AVX registers at entry. */
84 enum upper_128bits_state prev;
87 #define BLOCK_INFO(B) ((block_info) (B)->aux)
89 enum call_avx256_state
91 /* Callee returns 256bit AVX register. */
92 callee_return_avx256 = -1,
93 /* Callee returns and passes 256bit AVX register. */
94 callee_return_pass_avx256,
95 /* Callee passes 256bit AVX register. */
97 /* Callee doesn't return nor passe 256bit AVX register, or no
98 256bit AVX register in function return. */
100 /* vzeroupper intrinsic. */
104 /* Check if a 256bit AVX register is referenced in stores. */
107 check_avx256_stores (rtx dest, const_rtx set, void *data)
110 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
111 || (GET_CODE (set) == SET
112 && REG_P (SET_SRC (set))
113 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
115 enum upper_128bits_state *state
116 = (enum upper_128bits_state *) data;
121 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
122 in basic block BB. Delete it if upper 128bit AVX registers are
123 unused. If it isn't deleted, move it to just before a jump insn.
125 STATE is state of the upper 128bits of AVX registers at entry. */
128 move_or_delete_vzeroupper_2 (basic_block bb,
129 enum upper_128bits_state state)
132 rtx vzeroupper_insn = NULL_RTX;
137 if (BLOCK_INFO (bb)->unchanged)
140 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
143 BLOCK_INFO (bb)->state = state;
147 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
150 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
151 bb->index, BLOCK_INFO (bb)->state);
155 BLOCK_INFO (bb)->prev = state;
158 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
163 /* BB_END changes when it is deleted. */
164 bb_end = BB_END (bb);
166 while (insn != bb_end)
168 insn = NEXT_INSN (insn);
170 if (!NONDEBUG_INSN_P (insn))
173 /* Move vzeroupper before jump/call. */
174 if (JUMP_P (insn) || CALL_P (insn))
176 if (!vzeroupper_insn)
179 if (PREV_INSN (insn) != vzeroupper_insn)
183 fprintf (dump_file, "Move vzeroupper after:\n");
184 print_rtl_single (dump_file, PREV_INSN (insn));
185 fprintf (dump_file, "before:\n");
186 print_rtl_single (dump_file, insn);
188 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
191 vzeroupper_insn = NULL_RTX;
195 pat = PATTERN (insn);
197 /* Check insn for vzeroupper intrinsic. */
198 if (GET_CODE (pat) == UNSPEC_VOLATILE
199 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
203 /* Found vzeroupper intrinsic. */
204 fprintf (dump_file, "Found vzeroupper:\n");
205 print_rtl_single (dump_file, insn);
210 /* Check insn for vzeroall intrinsic. */
211 if (GET_CODE (pat) == PARALLEL
212 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
213 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
218 /* Delete pending vzeroupper insertion. */
221 delete_insn (vzeroupper_insn);
222 vzeroupper_insn = NULL_RTX;
225 else if (state != used)
227 note_stores (pat, check_avx256_stores, &state);
234 /* Process vzeroupper intrinsic. */
235 avx256 = INTVAL (XVECEXP (pat, 0, 0));
239 /* Since the upper 128bits are cleared, callee must not pass
240 256bit AVX register. We only need to check if callee
241 returns 256bit AVX register. */
242 if (avx256 == callee_return_avx256)
248 /* Remove unnecessary vzeroupper since upper 128bits are
252 fprintf (dump_file, "Delete redundant vzeroupper:\n");
253 print_rtl_single (dump_file, insn);
259 /* Set state to UNUSED if callee doesn't return 256bit AVX
261 if (avx256 != callee_return_pass_avx256)
264 if (avx256 == callee_return_pass_avx256
265 || avx256 == callee_pass_avx256)
267 /* Must remove vzeroupper since callee passes in 256bit
271 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
272 print_rtl_single (dump_file, insn);
278 vzeroupper_insn = insn;
284 BLOCK_INFO (bb)->state = state;
285 BLOCK_INFO (bb)->unchanged = unchanged;
286 BLOCK_INFO (bb)->scanned = true;
289 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
290 bb->index, unchanged ? "unchanged" : "changed",
294 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
295 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
296 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
300 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
304 enum upper_128bits_state state, old_state, new_state;
308 fprintf (dump_file, " Process [bb %i]: status: %d\n",
309 block->index, BLOCK_INFO (block)->processed);
311 if (BLOCK_INFO (block)->processed)
316 /* Check all predecessor edges of this block. */
317 seen_unknown = false;
318 FOR_EACH_EDGE (e, ei, block->preds)
322 switch (BLOCK_INFO (e->src)->state)
325 if (!unknown_is_unused)
339 old_state = BLOCK_INFO (block)->state;
340 move_or_delete_vzeroupper_2 (block, state);
341 new_state = BLOCK_INFO (block)->state;
343 if (state != unknown || new_state == used)
344 BLOCK_INFO (block)->processed = true;
346 /* Need to rescan if the upper 128bits of AVX registers are changed
348 if (new_state != old_state)
350 if (new_state == used)
351 cfun->machine->rescan_vzeroupper_p = 1;
358 /* Go through the instruction stream looking for vzeroupper. Delete
359 it if upper 128bit AVX registers are unused. If it isn't deleted,
360 move it to just before a jump insn. */
363 move_or_delete_vzeroupper (void)
368 fibheap_t worklist, pending, fibheap_swap;
369 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
374 /* Set up block info for each basic block. */
375 alloc_aux_for_blocks (sizeof (struct block_info_def));
377 /* Process outgoing edges of entry point. */
379 fprintf (dump_file, "Process outgoing edges of entry point\n");
381 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
383 move_or_delete_vzeroupper_2 (e->dest,
384 cfun->machine->caller_pass_avx256_p
386 BLOCK_INFO (e->dest)->processed = true;
389 /* Compute reverse completion order of depth first search of the CFG
390 so that the data-flow runs faster. */
391 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
392 bb_order = XNEWVEC (int, last_basic_block);
393 pre_and_rev_post_order_compute (NULL, rc_order, false);
394 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
395 bb_order[rc_order[i]] = i;
398 worklist = fibheap_new ();
399 pending = fibheap_new ();
400 visited = sbitmap_alloc (last_basic_block);
401 in_worklist = sbitmap_alloc (last_basic_block);
402 in_pending = sbitmap_alloc (last_basic_block);
403 sbitmap_zero (in_worklist);
405 /* Don't check outgoing edges of entry point. */
406 sbitmap_ones (in_pending);
408 if (BLOCK_INFO (bb)->processed)
409 RESET_BIT (in_pending, bb->index);
412 move_or_delete_vzeroupper_1 (bb, false);
413 fibheap_insert (pending, bb_order[bb->index], bb);
417 fprintf (dump_file, "Check remaining basic blocks\n");
419 while (!fibheap_empty (pending))
421 fibheap_swap = pending;
423 worklist = fibheap_swap;
424 sbitmap_swap = in_pending;
425 in_pending = in_worklist;
426 in_worklist = sbitmap_swap;
428 sbitmap_zero (visited);
430 cfun->machine->rescan_vzeroupper_p = 0;
432 while (!fibheap_empty (worklist))
434 bb = (basic_block) fibheap_extract_min (worklist);
435 RESET_BIT (in_worklist, bb->index);
436 gcc_assert (!TEST_BIT (visited, bb->index));
437 if (!TEST_BIT (visited, bb->index))
441 SET_BIT (visited, bb->index);
443 if (move_or_delete_vzeroupper_1 (bb, false))
444 FOR_EACH_EDGE (e, ei, bb->succs)
446 if (e->dest == EXIT_BLOCK_PTR
447 || BLOCK_INFO (e->dest)->processed)
450 if (TEST_BIT (visited, e->dest->index))
452 if (!TEST_BIT (in_pending, e->dest->index))
454 /* Send E->DEST to next round. */
455 SET_BIT (in_pending, e->dest->index);
456 fibheap_insert (pending,
457 bb_order[e->dest->index],
461 else if (!TEST_BIT (in_worklist, e->dest->index))
463 /* Add E->DEST to current round. */
464 SET_BIT (in_worklist, e->dest->index);
465 fibheap_insert (worklist, bb_order[e->dest->index],
472 if (!cfun->machine->rescan_vzeroupper_p)
477 fibheap_delete (worklist);
478 fibheap_delete (pending);
479 sbitmap_free (visited);
480 sbitmap_free (in_worklist);
481 sbitmap_free (in_pending);
484 fprintf (dump_file, "Process remaining basic blocks\n");
487 move_or_delete_vzeroupper_1 (bb, true);
489 free_aux_for_blocks ();
492 static rtx legitimize_dllimport_symbol (rtx, bool);
494 #ifndef CHECK_STACK_LIMIT
495 #define CHECK_STACK_LIMIT (-1)
498 /* Return index of given mode in mult and division cost tables. */
499 #define MODE_INDEX(mode) \
500 ((mode) == QImode ? 0 \
501 : (mode) == HImode ? 1 \
502 : (mode) == SImode ? 2 \
503 : (mode) == DImode ? 3 \
506 /* Processor costs (relative to an add) */
507 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
508 #define COSTS_N_BYTES(N) ((N) * 2)
510 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
513 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
514 COSTS_N_BYTES (2), /* cost of an add instruction */
515 COSTS_N_BYTES (3), /* cost of a lea instruction */
516 COSTS_N_BYTES (2), /* variable shift costs */
517 COSTS_N_BYTES (3), /* constant shift costs */
518 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
519 COSTS_N_BYTES (3), /* HI */
520 COSTS_N_BYTES (3), /* SI */
521 COSTS_N_BYTES (3), /* DI */
522 COSTS_N_BYTES (5)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
525 COSTS_N_BYTES (3), /* HI */
526 COSTS_N_BYTES (3), /* SI */
527 COSTS_N_BYTES (3), /* DI */
528 COSTS_N_BYTES (5)}, /* other */
529 COSTS_N_BYTES (3), /* cost of movsx */
530 COSTS_N_BYTES (3), /* cost of movzx */
531 0, /* "large" insn */
533 2, /* cost for loading QImode using movzbl */
534 {2, 2, 2}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 2}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {2, 2, 2}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 3, /* cost of moving MMX register */
544 {3, 3}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {3, 3}, /* cost of storing MMX registers
547 in SImode and DImode */
548 3, /* cost of moving SSE register */
549 {3, 3, 3}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {3, 3, 3}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 0, /* size of l1 cache */
555 0, /* size of l2 cache */
556 0, /* size of prefetch block */
557 0, /* number of parallel prefetches */
559 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
561 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
562 COSTS_N_BYTES (2), /* cost of FABS instruction. */
563 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
564 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
568 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 1, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 1, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
582 /* Processor costs (relative to an add) */
584 struct processor_costs i386_cost = { /* 386 specific costs */
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (1), /* cost of a lea instruction */
587 COSTS_N_INSNS (3), /* variable shift costs */
588 COSTS_N_INSNS (2), /* constant shift costs */
589 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (6), /* HI */
591 COSTS_N_INSNS (6), /* SI */
592 COSTS_N_INSNS (6), /* DI */
593 COSTS_N_INSNS (6)}, /* other */
594 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (23), /* HI */
597 COSTS_N_INSNS (23), /* SI */
598 COSTS_N_INSNS (23), /* DI */
599 COSTS_N_INSNS (23)}, /* other */
600 COSTS_N_INSNS (3), /* cost of movsx */
601 COSTS_N_INSNS (2), /* cost of movzx */
602 15, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {2, 4, 2}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {2, 4, 2}, /* cost of storing integer registers */
609 2, /* cost of reg,reg fld/fst */
610 {8, 8, 8}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {8, 8, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 8}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 8}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 8, 16}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 8, 16}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 3, /* MMX or SSE register to integer */
625 0, /* size of l1 cache */
626 0, /* size of l2 cache */
627 0, /* size of prefetch block */
628 0, /* number of parallel prefetches */
630 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (22), /* cost of FABS instruction. */
634 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
639 DUMMY_STRINGOP_ALGS},
640 1, /* scalar_stmt_cost. */
641 1, /* scalar load_cost. */
642 1, /* scalar_store_cost. */
643 1, /* vec_stmt_cost. */
644 1, /* vec_to_scalar_cost. */
645 1, /* scalar_to_vec_cost. */
646 1, /* vec_align_load_cost. */
647 2, /* vec_unalign_load_cost. */
648 1, /* vec_store_cost. */
649 3, /* cond_taken_branch_cost. */
650 1, /* cond_not_taken_branch_cost. */
654 struct processor_costs i486_cost = { /* 486 specific costs */
655 COSTS_N_INSNS (1), /* cost of an add instruction */
656 COSTS_N_INSNS (1), /* cost of a lea instruction */
657 COSTS_N_INSNS (3), /* variable shift costs */
658 COSTS_N_INSNS (2), /* constant shift costs */
659 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
660 COSTS_N_INSNS (12), /* HI */
661 COSTS_N_INSNS (12), /* SI */
662 COSTS_N_INSNS (12), /* DI */
663 COSTS_N_INSNS (12)}, /* other */
664 1, /* cost of multiply per each bit set */
665 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
666 COSTS_N_INSNS (40), /* HI */
667 COSTS_N_INSNS (40), /* SI */
668 COSTS_N_INSNS (40), /* DI */
669 COSTS_N_INSNS (40)}, /* other */
670 COSTS_N_INSNS (3), /* cost of movsx */
671 COSTS_N_INSNS (2), /* cost of movzx */
672 15, /* "large" insn */
674 4, /* cost for loading QImode using movzbl */
675 {2, 4, 2}, /* cost of loading integer registers
676 in QImode, HImode and SImode.
677 Relative to reg-reg move (2). */
678 {2, 4, 2}, /* cost of storing integer registers */
679 2, /* cost of reg,reg fld/fst */
680 {8, 8, 8}, /* cost of loading fp registers
681 in SFmode, DFmode and XFmode */
682 {8, 8, 8}, /* cost of storing fp registers
683 in SFmode, DFmode and XFmode */
684 2, /* cost of moving MMX register */
685 {4, 8}, /* cost of loading MMX registers
686 in SImode and DImode */
687 {4, 8}, /* cost of storing MMX registers
688 in SImode and DImode */
689 2, /* cost of moving SSE register */
690 {4, 8, 16}, /* cost of loading SSE registers
691 in SImode, DImode and TImode */
692 {4, 8, 16}, /* cost of storing SSE registers
693 in SImode, DImode and TImode */
694 3, /* MMX or SSE register to integer */
695 4, /* size of l1 cache. 486 has 8kB cache
696 shared for code and data, so 4kB is
697 not really precise. */
698 4, /* size of l2 cache */
699 0, /* size of prefetch block */
700 0, /* number of parallel prefetches */
702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (3), /* cost of FABS instruction. */
706 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
711 DUMMY_STRINGOP_ALGS},
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
726 struct processor_costs pentium_cost = {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1), /* cost of a lea instruction */
729 COSTS_N_INSNS (4), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (11), /* HI */
733 COSTS_N_INSNS (11), /* SI */
734 COSTS_N_INSNS (11), /* DI */
735 COSTS_N_INSNS (11)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (25), /* HI */
739 COSTS_N_INSNS (25), /* SI */
740 COSTS_N_INSNS (25), /* DI */
741 COSTS_N_INSNS (25)}, /* other */
742 COSTS_N_INSNS (3), /* cost of movsx */
743 COSTS_N_INSNS (2), /* cost of movzx */
744 8, /* "large" insn */
746 6, /* cost for loading QImode using movzbl */
747 {2, 4, 2}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {2, 4, 2}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {2, 2, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 6}, /* cost of storing fp registers
755 in SFmode, DFmode and XFmode */
756 8, /* cost of moving MMX register */
757 {8, 8}, /* cost of loading MMX registers
758 in SImode and DImode */
759 {8, 8}, /* cost of storing MMX registers
760 in SImode and DImode */
761 2, /* cost of moving SSE register */
762 {4, 8, 16}, /* cost of loading SSE registers
763 in SImode, DImode and TImode */
764 {4, 8, 16}, /* cost of storing SSE registers
765 in SImode, DImode and TImode */
766 3, /* MMX or SSE register to integer */
767 8, /* size of l1 cache. */
768 8, /* size of l2 cache */
769 0, /* size of prefetch block */
770 0, /* number of parallel prefetches */
772 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
773 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
774 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
775 COSTS_N_INSNS (1), /* cost of FABS instruction. */
776 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
777 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
778 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
779 DUMMY_STRINGOP_ALGS},
780 {{libcall, {{-1, rep_prefix_4_byte}}},
781 DUMMY_STRINGOP_ALGS},
782 1, /* scalar_stmt_cost. */
783 1, /* scalar load_cost. */
784 1, /* scalar_store_cost. */
785 1, /* vec_stmt_cost. */
786 1, /* vec_to_scalar_cost. */
787 1, /* scalar_to_vec_cost. */
788 1, /* vec_align_load_cost. */
789 2, /* vec_unalign_load_cost. */
790 1, /* vec_store_cost. */
791 3, /* cond_taken_branch_cost. */
792 1, /* cond_not_taken_branch_cost. */
796 struct processor_costs pentiumpro_cost = {
797 COSTS_N_INSNS (1), /* cost of an add instruction */
798 COSTS_N_INSNS (1), /* cost of a lea instruction */
799 COSTS_N_INSNS (1), /* variable shift costs */
800 COSTS_N_INSNS (1), /* constant shift costs */
801 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
802 COSTS_N_INSNS (4), /* HI */
803 COSTS_N_INSNS (4), /* SI */
804 COSTS_N_INSNS (4), /* DI */
805 COSTS_N_INSNS (4)}, /* other */
806 0, /* cost of multiply per each bit set */
807 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
808 COSTS_N_INSNS (17), /* HI */
809 COSTS_N_INSNS (17), /* SI */
810 COSTS_N_INSNS (17), /* DI */
811 COSTS_N_INSNS (17)}, /* other */
812 COSTS_N_INSNS (1), /* cost of movsx */
813 COSTS_N_INSNS (1), /* cost of movzx */
814 8, /* "large" insn */
816 2, /* cost for loading QImode using movzbl */
817 {4, 4, 4}, /* cost of loading integer registers
818 in QImode, HImode and SImode.
819 Relative to reg-reg move (2). */
820 {2, 2, 2}, /* cost of storing integer registers */
821 2, /* cost of reg,reg fld/fst */
822 {2, 2, 6}, /* cost of loading fp registers
823 in SFmode, DFmode and XFmode */
824 {4, 4, 6}, /* cost of storing fp registers
825 in SFmode, DFmode and XFmode */
826 2, /* cost of moving MMX register */
827 {2, 2}, /* cost of loading MMX registers
828 in SImode and DImode */
829 {2, 2}, /* cost of storing MMX registers
830 in SImode and DImode */
831 2, /* cost of moving SSE register */
832 {2, 2, 8}, /* cost of loading SSE registers
833 in SImode, DImode and TImode */
834 {2, 2, 8}, /* cost of storing SSE registers
835 in SImode, DImode and TImode */
836 3, /* MMX or SSE register to integer */
837 8, /* size of l1 cache. */
838 256, /* size of l2 cache */
839 32, /* size of prefetch block */
840 6, /* number of parallel prefetches */
842 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
843 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
844 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
845 COSTS_N_INSNS (2), /* cost of FABS instruction. */
846 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
847 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
848 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
849 (we ensure the alignment). For small blocks inline loop is still a
850 noticeable win, for bigger blocks either rep movsl or rep movsb is
851 way to go. Rep movsb has apparently more expensive startup time in CPU,
852 but after 4K the difference is down in the noise. */
853 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
854 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
855 DUMMY_STRINGOP_ALGS},
856 {{rep_prefix_4_byte, {{1024, unrolled_loop},
857 {8192, rep_prefix_4_byte}, {-1, libcall}}},
858 DUMMY_STRINGOP_ALGS},
859 1, /* scalar_stmt_cost. */
860 1, /* scalar load_cost. */
861 1, /* scalar_store_cost. */
862 1, /* vec_stmt_cost. */
863 1, /* vec_to_scalar_cost. */
864 1, /* scalar_to_vec_cost. */
865 1, /* vec_align_load_cost. */
866 2, /* vec_unalign_load_cost. */
867 1, /* vec_store_cost. */
868 3, /* cond_taken_branch_cost. */
869 1, /* cond_not_taken_branch_cost. */
873 struct processor_costs geode_cost = {
874 COSTS_N_INSNS (1), /* cost of an add instruction */
875 COSTS_N_INSNS (1), /* cost of a lea instruction */
876 COSTS_N_INSNS (2), /* variable shift costs */
877 COSTS_N_INSNS (1), /* constant shift costs */
878 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
879 COSTS_N_INSNS (4), /* HI */
880 COSTS_N_INSNS (7), /* SI */
881 COSTS_N_INSNS (7), /* DI */
882 COSTS_N_INSNS (7)}, /* other */
883 0, /* cost of multiply per each bit set */
884 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
885 COSTS_N_INSNS (23), /* HI */
886 COSTS_N_INSNS (39), /* SI */
887 COSTS_N_INSNS (39), /* DI */
888 COSTS_N_INSNS (39)}, /* other */
889 COSTS_N_INSNS (1), /* cost of movsx */
890 COSTS_N_INSNS (1), /* cost of movzx */
891 8, /* "large" insn */
893 1, /* cost for loading QImode using movzbl */
894 {1, 1, 1}, /* cost of loading integer registers
895 in QImode, HImode and SImode.
896 Relative to reg-reg move (2). */
897 {1, 1, 1}, /* cost of storing integer registers */
898 1, /* cost of reg,reg fld/fst */
899 {1, 1, 1}, /* cost of loading fp registers
900 in SFmode, DFmode and XFmode */
901 {4, 6, 6}, /* cost of storing fp registers
902 in SFmode, DFmode and XFmode */
904 1, /* cost of moving MMX register */
905 {1, 1}, /* cost of loading MMX registers
906 in SImode and DImode */
907 {1, 1}, /* cost of storing MMX registers
908 in SImode and DImode */
909 1, /* cost of moving SSE register */
910 {1, 1, 1}, /* cost of loading SSE registers
911 in SImode, DImode and TImode */
912 {1, 1, 1}, /* cost of storing SSE registers
913 in SImode, DImode and TImode */
914 1, /* MMX or SSE register to integer */
915 64, /* size of l1 cache. */
916 128, /* size of l2 cache. */
917 32, /* size of prefetch block */
918 1, /* number of parallel prefetches */
920 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (1), /* cost of FABS instruction. */
924 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
929 DUMMY_STRINGOP_ALGS},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k6_cost = {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (3), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (3), /* DI */
953 COSTS_N_INSNS (3)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (18), /* HI */
957 COSTS_N_INSNS (18), /* SI */
958 COSTS_N_INSNS (18), /* DI */
959 COSTS_N_INSNS (18)}, /* other */
960 COSTS_N_INSNS (2), /* cost of movsx */
961 COSTS_N_INSNS (2), /* cost of movzx */
962 8, /* "large" insn */
964 3, /* cost for loading QImode using movzbl */
965 {4, 5, 4}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {2, 3, 2}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {6, 6, 6}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {4, 4, 4}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {2, 2}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {2, 2}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {2, 2, 8}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {2, 2, 8}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 6, /* MMX or SSE register to integer */
985 32, /* size of l1 cache. */
986 32, /* size of l2 cache. Some models
987 have integrated l2 cache, but
988 optimizing for k6 is not important
989 enough to worry about that. */
990 32, /* size of prefetch block */
991 1, /* number of parallel prefetches */
993 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
994 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
995 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
996 COSTS_N_INSNS (2), /* cost of FABS instruction. */
997 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
998 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1002 DUMMY_STRINGOP_ALGS},
1003 1, /* scalar_stmt_cost. */
1004 1, /* scalar load_cost. */
1005 1, /* scalar_store_cost. */
1006 1, /* vec_stmt_cost. */
1007 1, /* vec_to_scalar_cost. */
1008 1, /* scalar_to_vec_cost. */
1009 1, /* vec_align_load_cost. */
1010 2, /* vec_unalign_load_cost. */
1011 1, /* vec_store_cost. */
1012 3, /* cond_taken_branch_cost. */
1013 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs athlon_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (2), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (5), /* HI */
1024 COSTS_N_INSNS (5), /* SI */
1025 COSTS_N_INSNS (5), /* DI */
1026 COSTS_N_INSNS (5)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (26), /* HI */
1030 COSTS_N_INSNS (42), /* SI */
1031 COSTS_N_INSNS (74), /* DI */
1032 COSTS_N_INSNS (74)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {3, 4, 3}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {3, 4, 3}, /* cost of storing integer registers */
1042 4, /* cost of reg,reg fld/fst */
1043 {4, 4, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {6, 6, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 6}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 5}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 5, /* MMX or SSE register to integer */
1058 64, /* size of l1 cache. */
1059 256, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 6, /* number of parallel prefetches */
1062 5, /* Branch cost */
1063 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1069 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1070 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1071 128 bytes for memset. */
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1075 DUMMY_STRINGOP_ALGS},
1076 1, /* scalar_stmt_cost. */
1077 1, /* scalar load_cost. */
1078 1, /* scalar_store_cost. */
1079 1, /* vec_stmt_cost. */
1080 1, /* vec_to_scalar_cost. */
1081 1, /* scalar_to_vec_cost. */
1082 1, /* vec_align_load_cost. */
1083 2, /* vec_unalign_load_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1090 struct processor_costs k8_cost = {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (2), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (3), /* SI */
1098 COSTS_N_INSNS (4), /* DI */
1099 COSTS_N_INSNS (5)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (26), /* HI */
1103 COSTS_N_INSNS (42), /* SI */
1104 COSTS_N_INSNS (74), /* DI */
1105 COSTS_N_INSNS (74)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1110 4, /* cost for loading QImode using movzbl */
1111 {3, 4, 3}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {3, 4, 3}, /* cost of storing integer registers */
1115 4, /* cost of reg,reg fld/fst */
1116 {4, 4, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {6, 6, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {3, 3}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 3, 6}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 5}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 5, /* MMX or SSE register to integer */
1131 64, /* size of l1 cache. */
1132 512, /* size of l2 cache. */
1133 64, /* size of prefetch block */
1134 /* New AMD processors never drop prefetches; if they cannot be performed
1135 immediately, they are queued. We set number of simultaneous prefetches
1136 to a large constant to reflect this (it probably is not a good idea not
1137 to limit number of prefetches at all, as their execution also takes some
1139 100, /* number of parallel prefetches */
1140 3, /* Branch cost */
1141 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1142 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1143 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1144 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1145 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1146 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1147 /* K8 has optimized REP instruction for medium sized blocks, but for very
1148 small blocks it is better to use loop. For large blocks, libcall can
1149 do nontemporary accesses and beat inline considerably. */
1150 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1151 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1152 {{libcall, {{8, loop}, {24, unrolled_loop},
1153 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1154 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1155 4, /* scalar_stmt_cost. */
1156 2, /* scalar load_cost. */
1157 2, /* scalar_store_cost. */
1158 5, /* vec_stmt_cost. */
1159 0, /* vec_to_scalar_cost. */
1160 2, /* scalar_to_vec_cost. */
1161 2, /* vec_align_load_cost. */
1162 3, /* vec_unalign_load_cost. */
1163 3, /* vec_store_cost. */
1164 3, /* cond_taken_branch_cost. */
1165 2, /* cond_not_taken_branch_cost. */
1168 struct processor_costs amdfam10_cost = {
1169 COSTS_N_INSNS (1), /* cost of an add instruction */
1170 COSTS_N_INSNS (2), /* cost of a lea instruction */
1171 COSTS_N_INSNS (1), /* variable shift costs */
1172 COSTS_N_INSNS (1), /* constant shift costs */
1173 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1174 COSTS_N_INSNS (4), /* HI */
1175 COSTS_N_INSNS (3), /* SI */
1176 COSTS_N_INSNS (4), /* DI */
1177 COSTS_N_INSNS (5)}, /* other */
1178 0, /* cost of multiply per each bit set */
1179 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1180 COSTS_N_INSNS (35), /* HI */
1181 COSTS_N_INSNS (51), /* SI */
1182 COSTS_N_INSNS (83), /* DI */
1183 COSTS_N_INSNS (83)}, /* other */
1184 COSTS_N_INSNS (1), /* cost of movsx */
1185 COSTS_N_INSNS (1), /* cost of movzx */
1186 8, /* "large" insn */
1188 4, /* cost for loading QImode using movzbl */
1189 {3, 4, 3}, /* cost of loading integer registers
1190 in QImode, HImode and SImode.
1191 Relative to reg-reg move (2). */
1192 {3, 4, 3}, /* cost of storing integer registers */
1193 4, /* cost of reg,reg fld/fst */
1194 {4, 4, 12}, /* cost of loading fp registers
1195 in SFmode, DFmode and XFmode */
1196 {6, 6, 8}, /* cost of storing fp registers
1197 in SFmode, DFmode and XFmode */
1198 2, /* cost of moving MMX register */
1199 {3, 3}, /* cost of loading MMX registers
1200 in SImode and DImode */
1201 {4, 4}, /* cost of storing MMX registers
1202 in SImode and DImode */
1203 2, /* cost of moving SSE register */
1204 {4, 4, 3}, /* cost of loading SSE registers
1205 in SImode, DImode and TImode */
1206 {4, 4, 5}, /* cost of storing SSE registers
1207 in SImode, DImode and TImode */
1208 3, /* MMX or SSE register to integer */
1210 MOVD reg64, xmmreg Double FSTORE 4
1211 MOVD reg32, xmmreg Double FSTORE 4
1213 MOVD reg64, xmmreg Double FADD 3
1215 MOVD reg32, xmmreg Double FADD 3
1217 64, /* size of l1 cache. */
1218 512, /* size of l2 cache. */
1219 64, /* size of prefetch block */
1220 /* New AMD processors never drop prefetches; if they cannot be performed
1221 immediately, they are queued. We set number of simultaneous prefetches
1222 to a large constant to reflect this (it probably is not a good idea not
1223 to limit number of prefetches at all, as their execution also takes some
1225 100, /* number of parallel prefetches */
1226 2, /* Branch cost */
1227 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1228 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1229 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1230 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1231 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1232 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1234 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1235 very small blocks it is better to use loop. For large blocks, libcall can
1236 do nontemporary accesses and beat inline considerably. */
1237 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1238 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1239 {{libcall, {{8, loop}, {24, unrolled_loop},
1240 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1241 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1242 4, /* scalar_stmt_cost. */
1243 2, /* scalar load_cost. */
1244 2, /* scalar_store_cost. */
1245 6, /* vec_stmt_cost. */
1246 0, /* vec_to_scalar_cost. */
1247 2, /* scalar_to_vec_cost. */
1248 2, /* vec_align_load_cost. */
1249 2, /* vec_unalign_load_cost. */
1250 2, /* vec_store_cost. */
1251 2, /* cond_taken_branch_cost. */
1252 1, /* cond_not_taken_branch_cost. */
1255 struct processor_costs bdver1_cost = {
1256 COSTS_N_INSNS (1), /* cost of an add instruction */
1257 COSTS_N_INSNS (1), /* cost of a lea instruction */
1258 COSTS_N_INSNS (1), /* variable shift costs */
1259 COSTS_N_INSNS (1), /* constant shift costs */
1260 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1261 COSTS_N_INSNS (4), /* HI */
1262 COSTS_N_INSNS (4), /* SI */
1263 COSTS_N_INSNS (6), /* DI */
1264 COSTS_N_INSNS (6)}, /* other */
1265 0, /* cost of multiply per each bit set */
1266 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1267 COSTS_N_INSNS (35), /* HI */
1268 COSTS_N_INSNS (51), /* SI */
1269 COSTS_N_INSNS (83), /* DI */
1270 COSTS_N_INSNS (83)}, /* other */
1271 COSTS_N_INSNS (1), /* cost of movsx */
1272 COSTS_N_INSNS (1), /* cost of movzx */
1273 8, /* "large" insn */
1275 4, /* cost for loading QImode using movzbl */
1276 {5, 5, 4}, /* cost of loading integer registers
1277 in QImode, HImode and SImode.
1278 Relative to reg-reg move (2). */
1279 {4, 4, 4}, /* cost of storing integer registers */
1280 2, /* cost of reg,reg fld/fst */
1281 {5, 5, 12}, /* cost of loading fp registers
1282 in SFmode, DFmode and XFmode */
1283 {4, 4, 8}, /* cost of storing fp registers
1284 in SFmode, DFmode and XFmode */
1285 2, /* cost of moving MMX register */
1286 {4, 4}, /* cost of loading MMX registers
1287 in SImode and DImode */
1288 {4, 4}, /* cost of storing MMX registers
1289 in SImode and DImode */
1290 2, /* cost of moving SSE register */
1291 {4, 4, 4}, /* cost of loading SSE registers
1292 in SImode, DImode and TImode */
1293 {4, 4, 4}, /* cost of storing SSE registers
1294 in SImode, DImode and TImode */
1295 2, /* MMX or SSE register to integer */
1297 MOVD reg64, xmmreg Double FSTORE 4
1298 MOVD reg32, xmmreg Double FSTORE 4
1300 MOVD reg64, xmmreg Double FADD 3
1302 MOVD reg32, xmmreg Double FADD 3
1304 16, /* size of l1 cache. */
1305 2048, /* size of l2 cache. */
1306 64, /* size of prefetch block */
1307 /* New AMD processors never drop prefetches; if they cannot be performed
1308 immediately, they are queued. We set number of simultaneous prefetches
1309 to a large constant to reflect this (it probably is not a good idea not
1310 to limit number of prefetches at all, as their execution also takes some
1312 100, /* number of parallel prefetches */
1313 2, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1322 very small blocks it is better to use loop. For large blocks, libcall
1323 can do nontemporary accesses and beat inline considerably. */
1324 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1325 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1326 {{libcall, {{8, loop}, {24, unrolled_loop},
1327 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1328 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1329 6, /* scalar_stmt_cost. */
1330 4, /* scalar load_cost. */
1331 4, /* scalar_store_cost. */
1332 6, /* vec_stmt_cost. */
1333 0, /* vec_to_scalar_cost. */
1334 2, /* scalar_to_vec_cost. */
1335 4, /* vec_align_load_cost. */
1336 4, /* vec_unalign_load_cost. */
1337 4, /* vec_store_cost. */
1338 2, /* cond_taken_branch_cost. */
1339 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs bdver2_cost = {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (4), /* HI */
1349 COSTS_N_INSNS (4), /* SI */
1350 COSTS_N_INSNS (6), /* DI */
1351 COSTS_N_INSNS (6)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (35), /* HI */
1355 COSTS_N_INSNS (51), /* SI */
1356 COSTS_N_INSNS (83), /* DI */
1357 COSTS_N_INSNS (83)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 8, /* "large" insn */
1362 4, /* cost for loading QImode using movzbl */
1363 {5, 5, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 2, /* cost of reg,reg fld/fst */
1368 {5, 5, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 8}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 2, /* cost of moving MMX register */
1373 {4, 4}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {4, 4}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 2, /* cost of moving SSE register */
1378 {4, 4, 4}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {4, 4, 4}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 2, /* MMX or SSE register to integer */
1384 MOVD reg64, xmmreg Double FSTORE 4
1385 MOVD reg32, xmmreg Double FSTORE 4
1387 MOVD reg64, xmmreg Double FADD 3
1389 MOVD reg32, xmmreg Double FADD 3
1391 16, /* size of l1 cache. */
1392 2048, /* size of l2 cache. */
1393 64, /* size of prefetch block */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches */
1400 2, /* Branch cost */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1408 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1409 very small blocks it is better to use loop. For large blocks, libcall
1410 can do nontemporary accesses and beat inline considerably. */
1411 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1412 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1413 {{libcall, {{8, loop}, {24, unrolled_loop},
1414 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1415 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1416 6, /* scalar_stmt_cost. */
1417 4, /* scalar load_cost. */
1418 4, /* scalar_store_cost. */
1419 6, /* vec_stmt_cost. */
1420 0, /* vec_to_scalar_cost. */
1421 2, /* scalar_to_vec_cost. */
1422 4, /* vec_align_load_cost. */
1423 4, /* vec_unalign_load_cost. */
1424 4, /* vec_store_cost. */
1425 2, /* cond_taken_branch_cost. */
1426 1, /* cond_not_taken_branch_cost. */
1429 struct processor_costs btver1_cost = {
1430 COSTS_N_INSNS (1), /* cost of an add instruction */
1431 COSTS_N_INSNS (2), /* cost of a lea instruction */
1432 COSTS_N_INSNS (1), /* variable shift costs */
1433 COSTS_N_INSNS (1), /* constant shift costs */
1434 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1435 COSTS_N_INSNS (4), /* HI */
1436 COSTS_N_INSNS (3), /* SI */
1437 COSTS_N_INSNS (4), /* DI */
1438 COSTS_N_INSNS (5)}, /* other */
1439 0, /* cost of multiply per each bit set */
1440 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1441 COSTS_N_INSNS (35), /* HI */
1442 COSTS_N_INSNS (51), /* SI */
1443 COSTS_N_INSNS (83), /* DI */
1444 COSTS_N_INSNS (83)}, /* other */
1445 COSTS_N_INSNS (1), /* cost of movsx */
1446 COSTS_N_INSNS (1), /* cost of movzx */
1447 8, /* "large" insn */
1449 4, /* cost for loading QImode using movzbl */
1450 {3, 4, 3}, /* cost of loading integer registers
1451 in QImode, HImode and SImode.
1452 Relative to reg-reg move (2). */
1453 {3, 4, 3}, /* cost of storing integer registers */
1454 4, /* cost of reg,reg fld/fst */
1455 {4, 4, 12}, /* cost of loading fp registers
1456 in SFmode, DFmode and XFmode */
1457 {6, 6, 8}, /* cost of storing fp registers
1458 in SFmode, DFmode and XFmode */
1459 2, /* cost of moving MMX register */
1460 {3, 3}, /* cost of loading MMX registers
1461 in SImode and DImode */
1462 {4, 4}, /* cost of storing MMX registers
1463 in SImode and DImode */
1464 2, /* cost of moving SSE register */
1465 {4, 4, 3}, /* cost of loading SSE registers
1466 in SImode, DImode and TImode */
1467 {4, 4, 5}, /* cost of storing SSE registers
1468 in SImode, DImode and TImode */
1469 3, /* MMX or SSE register to integer */
1471 MOVD reg64, xmmreg Double FSTORE 4
1472 MOVD reg32, xmmreg Double FSTORE 4
1474 MOVD reg64, xmmreg Double FADD 3
1476 MOVD reg32, xmmreg Double FADD 3
1478 32, /* size of l1 cache. */
1479 512, /* size of l2 cache. */
1480 64, /* size of prefetch block */
1481 100, /* number of parallel prefetches */
1482 2, /* Branch cost */
1483 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1484 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1485 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1486 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1487 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1488 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1490 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1491 very small blocks it is better to use loop. For large blocks, libcall can
1492 do nontemporary accesses and beat inline considerably. */
1493 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1494 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1495 {{libcall, {{8, loop}, {24, unrolled_loop},
1496 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1497 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1498 4, /* scalar_stmt_cost. */
1499 2, /* scalar load_cost. */
1500 2, /* scalar_store_cost. */
1501 6, /* vec_stmt_cost. */
1502 0, /* vec_to_scalar_cost. */
1503 2, /* scalar_to_vec_cost. */
1504 2, /* vec_align_load_cost. */
1505 2, /* vec_unalign_load_cost. */
1506 2, /* vec_store_cost. */
1507 2, /* cond_taken_branch_cost. */
1508 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs pentium4_cost = {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (3), /* cost of a lea instruction */
1515 COSTS_N_INSNS (4), /* variable shift costs */
1516 COSTS_N_INSNS (4), /* constant shift costs */
1517 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (15), /* HI */
1519 COSTS_N_INSNS (15), /* SI */
1520 COSTS_N_INSNS (15), /* DI */
1521 COSTS_N_INSNS (15)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (56), /* HI */
1525 COSTS_N_INSNS (56), /* SI */
1526 COSTS_N_INSNS (56), /* DI */
1527 COSTS_N_INSNS (56)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1532 2, /* cost for loading QImode using movzbl */
1533 {4, 5, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {2, 3, 2}, /* cost of storing integer registers */
1537 2, /* cost of reg,reg fld/fst */
1538 {2, 2, 6}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 6}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {2, 2}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {2, 2}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 12, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {2, 2, 8}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 10, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 256, /* size of l2 cache. */
1555 64, /* size of prefetch block */
1556 6, /* number of parallel prefetches */
1557 2, /* Branch cost */
1558 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1564 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1565 DUMMY_STRINGOP_ALGS},
1566 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1568 DUMMY_STRINGOP_ALGS},
1569 1, /* scalar_stmt_cost. */
1570 1, /* scalar load_cost. */
1571 1, /* scalar_store_cost. */
1572 1, /* vec_stmt_cost. */
1573 1, /* vec_to_scalar_cost. */
1574 1, /* scalar_to_vec_cost. */
1575 1, /* vec_align_load_cost. */
1576 2, /* vec_unalign_load_cost. */
1577 1, /* vec_store_cost. */
1578 3, /* cond_taken_branch_cost. */
1579 1, /* cond_not_taken_branch_cost. */
1583 struct processor_costs nocona_cost = {
1584 COSTS_N_INSNS (1), /* cost of an add instruction */
1585 COSTS_N_INSNS (1), /* cost of a lea instruction */
1586 COSTS_N_INSNS (1), /* variable shift costs */
1587 COSTS_N_INSNS (1), /* constant shift costs */
1588 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1589 COSTS_N_INSNS (10), /* HI */
1590 COSTS_N_INSNS (10), /* SI */
1591 COSTS_N_INSNS (10), /* DI */
1592 COSTS_N_INSNS (10)}, /* other */
1593 0, /* cost of multiply per each bit set */
1594 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1595 COSTS_N_INSNS (66), /* HI */
1596 COSTS_N_INSNS (66), /* SI */
1597 COSTS_N_INSNS (66), /* DI */
1598 COSTS_N_INSNS (66)}, /* other */
1599 COSTS_N_INSNS (1), /* cost of movsx */
1600 COSTS_N_INSNS (1), /* cost of movzx */
1601 16, /* "large" insn */
1602 17, /* MOVE_RATIO */
1603 4, /* cost for loading QImode using movzbl */
1604 {4, 4, 4}, /* cost of loading integer registers
1605 in QImode, HImode and SImode.
1606 Relative to reg-reg move (2). */
1607 {4, 4, 4}, /* cost of storing integer registers */
1608 3, /* cost of reg,reg fld/fst */
1609 {12, 12, 12}, /* cost of loading fp registers
1610 in SFmode, DFmode and XFmode */
1611 {4, 4, 4}, /* cost of storing fp registers
1612 in SFmode, DFmode and XFmode */
1613 6, /* cost of moving MMX register */
1614 {12, 12}, /* cost of loading MMX registers
1615 in SImode and DImode */
1616 {12, 12}, /* cost of storing MMX registers
1617 in SImode and DImode */
1618 6, /* cost of moving SSE register */
1619 {12, 12, 12}, /* cost of loading SSE registers
1620 in SImode, DImode and TImode */
1621 {12, 12, 12}, /* cost of storing SSE registers
1622 in SImode, DImode and TImode */
1623 8, /* MMX or SSE register to integer */
1624 8, /* size of l1 cache. */
1625 1024, /* size of l2 cache. */
1626 128, /* size of prefetch block */
1627 8, /* number of parallel prefetches */
1628 1, /* Branch cost */
1629 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1630 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1631 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1632 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1633 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1634 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1635 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1636 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1637 {100000, unrolled_loop}, {-1, libcall}}}},
1638 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1640 {libcall, {{24, loop}, {64, unrolled_loop},
1641 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1642 1, /* scalar_stmt_cost. */
1643 1, /* scalar load_cost. */
1644 1, /* scalar_store_cost. */
1645 1, /* vec_stmt_cost. */
1646 1, /* vec_to_scalar_cost. */
1647 1, /* scalar_to_vec_cost. */
1648 1, /* vec_align_load_cost. */
1649 2, /* vec_unalign_load_cost. */
1650 1, /* vec_store_cost. */
1651 3, /* cond_taken_branch_cost. */
1652 1, /* cond_not_taken_branch_cost. */
1656 struct processor_costs atom_cost = {
1657 COSTS_N_INSNS (1), /* cost of an add instruction */
1658 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1659 COSTS_N_INSNS (1), /* variable shift costs */
1660 COSTS_N_INSNS (1), /* constant shift costs */
1661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1662 COSTS_N_INSNS (4), /* HI */
1663 COSTS_N_INSNS (3), /* SI */
1664 COSTS_N_INSNS (4), /* DI */
1665 COSTS_N_INSNS (2)}, /* other */
1666 0, /* cost of multiply per each bit set */
1667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1668 COSTS_N_INSNS (26), /* HI */
1669 COSTS_N_INSNS (42), /* SI */
1670 COSTS_N_INSNS (74), /* DI */
1671 COSTS_N_INSNS (74)}, /* other */
1672 COSTS_N_INSNS (1), /* cost of movsx */
1673 COSTS_N_INSNS (1), /* cost of movzx */
1674 8, /* "large" insn */
1675 17, /* MOVE_RATIO */
1676 4, /* cost for loading QImode using movzbl */
1677 {4, 4, 4}, /* cost of loading integer registers
1678 in QImode, HImode and SImode.
1679 Relative to reg-reg move (2). */
1680 {4, 4, 4}, /* cost of storing integer registers */
1681 4, /* cost of reg,reg fld/fst */
1682 {12, 12, 12}, /* cost of loading fp registers
1683 in SFmode, DFmode and XFmode */
1684 {6, 6, 8}, /* cost of storing fp registers
1685 in SFmode, DFmode and XFmode */
1686 2, /* cost of moving MMX register */
1687 {8, 8}, /* cost of loading MMX registers
1688 in SImode and DImode */
1689 {8, 8}, /* cost of storing MMX registers
1690 in SImode and DImode */
1691 2, /* cost of moving SSE register */
1692 {8, 8, 8}, /* cost of loading SSE registers
1693 in SImode, DImode and TImode */
1694 {8, 8, 8}, /* cost of storing SSE registers
1695 in SImode, DImode and TImode */
1696 5, /* MMX or SSE register to integer */
1697 32, /* size of l1 cache. */
1698 256, /* size of l2 cache. */
1699 64, /* size of prefetch block */
1700 6, /* number of parallel prefetches */
1701 3, /* Branch cost */
1702 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1703 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1704 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1705 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1706 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1707 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1708 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1709 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1710 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1711 {{libcall, {{8, loop}, {15, unrolled_loop},
1712 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1713 {libcall, {{24, loop}, {32, unrolled_loop},
1714 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1715 1, /* scalar_stmt_cost. */
1716 1, /* scalar load_cost. */
1717 1, /* scalar_store_cost. */
1718 1, /* vec_stmt_cost. */
1719 1, /* vec_to_scalar_cost. */
1720 1, /* scalar_to_vec_cost. */
1721 1, /* vec_align_load_cost. */
1722 2, /* vec_unalign_load_cost. */
1723 1, /* vec_store_cost. */
1724 3, /* cond_taken_branch_cost. */
1725 1, /* cond_not_taken_branch_cost. */
1728 /* Generic64 should produce code tuned for Nocona and K8. */
1730 struct processor_costs generic64_cost = {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 /* On all chips taken into consideration lea is 2 cycles and more. With
1733 this cost however our current implementation of synth_mult results in
1734 use of unnecessary temporary registers causing regression on several
1735 SPECfp benchmarks. */
1736 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1737 COSTS_N_INSNS (1), /* variable shift costs */
1738 COSTS_N_INSNS (1), /* constant shift costs */
1739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1740 COSTS_N_INSNS (4), /* HI */
1741 COSTS_N_INSNS (3), /* SI */
1742 COSTS_N_INSNS (4), /* DI */
1743 COSTS_N_INSNS (2)}, /* other */
1744 0, /* cost of multiply per each bit set */
1745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1746 COSTS_N_INSNS (26), /* HI */
1747 COSTS_N_INSNS (42), /* SI */
1748 COSTS_N_INSNS (74), /* DI */
1749 COSTS_N_INSNS (74)}, /* other */
1750 COSTS_N_INSNS (1), /* cost of movsx */
1751 COSTS_N_INSNS (1), /* cost of movzx */
1752 8, /* "large" insn */
1753 17, /* MOVE_RATIO */
1754 4, /* cost for loading QImode using movzbl */
1755 {4, 4, 4}, /* cost of loading integer registers
1756 in QImode, HImode and SImode.
1757 Relative to reg-reg move (2). */
1758 {4, 4, 4}, /* cost of storing integer registers */
1759 4, /* cost of reg,reg fld/fst */
1760 {12, 12, 12}, /* cost of loading fp registers
1761 in SFmode, DFmode and XFmode */
1762 {6, 6, 8}, /* cost of storing fp registers
1763 in SFmode, DFmode and XFmode */
1764 2, /* cost of moving MMX register */
1765 {8, 8}, /* cost of loading MMX registers
1766 in SImode and DImode */
1767 {8, 8}, /* cost of storing MMX registers
1768 in SImode and DImode */
1769 2, /* cost of moving SSE register */
1770 {8, 8, 8}, /* cost of loading SSE registers
1771 in SImode, DImode and TImode */
1772 {8, 8, 8}, /* cost of storing SSE registers
1773 in SImode, DImode and TImode */
1774 5, /* MMX or SSE register to integer */
1775 32, /* size of l1 cache. */
1776 512, /* size of l2 cache. */
1777 64, /* size of prefetch block */
1778 6, /* number of parallel prefetches */
1779 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1780 value is increased to perhaps more appropriate value of 5. */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {DUMMY_STRINGOP_ALGS,
1789 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1790 {DUMMY_STRINGOP_ALGS,
1791 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1792 1, /* scalar_stmt_cost. */
1793 1, /* scalar load_cost. */
1794 1, /* scalar_store_cost. */
1795 1, /* vec_stmt_cost. */
1796 1, /* vec_to_scalar_cost. */
1797 1, /* scalar_to_vec_cost. */
1798 1, /* vec_align_load_cost. */
1799 2, /* vec_unalign_load_cost. */
1800 1, /* vec_store_cost. */
1801 3, /* cond_taken_branch_cost. */
1802 1, /* cond_not_taken_branch_cost. */
1805 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1808 struct processor_costs generic32_cost = {
1809 COSTS_N_INSNS (1), /* cost of an add instruction */
1810 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1811 COSTS_N_INSNS (1), /* variable shift costs */
1812 COSTS_N_INSNS (1), /* constant shift costs */
1813 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1814 COSTS_N_INSNS (4), /* HI */
1815 COSTS_N_INSNS (3), /* SI */
1816 COSTS_N_INSNS (4), /* DI */
1817 COSTS_N_INSNS (2)}, /* other */
1818 0, /* cost of multiply per each bit set */
1819 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1820 COSTS_N_INSNS (26), /* HI */
1821 COSTS_N_INSNS (42), /* SI */
1822 COSTS_N_INSNS (74), /* DI */
1823 COSTS_N_INSNS (74)}, /* other */
1824 COSTS_N_INSNS (1), /* cost of movsx */
1825 COSTS_N_INSNS (1), /* cost of movzx */
1826 8, /* "large" insn */
1827 17, /* MOVE_RATIO */
1828 4, /* cost for loading QImode using movzbl */
1829 {4, 4, 4}, /* cost of loading integer registers
1830 in QImode, HImode and SImode.
1831 Relative to reg-reg move (2). */
1832 {4, 4, 4}, /* cost of storing integer registers */
1833 4, /* cost of reg,reg fld/fst */
1834 {12, 12, 12}, /* cost of loading fp registers
1835 in SFmode, DFmode and XFmode */
1836 {6, 6, 8}, /* cost of storing fp registers
1837 in SFmode, DFmode and XFmode */
1838 2, /* cost of moving MMX register */
1839 {8, 8}, /* cost of loading MMX registers
1840 in SImode and DImode */
1841 {8, 8}, /* cost of storing MMX registers
1842 in SImode and DImode */
1843 2, /* cost of moving SSE register */
1844 {8, 8, 8}, /* cost of loading SSE registers
1845 in SImode, DImode and TImode */
1846 {8, 8, 8}, /* cost of storing SSE registers
1847 in SImode, DImode and TImode */
1848 5, /* MMX or SSE register to integer */
1849 32, /* size of l1 cache. */
1850 256, /* size of l2 cache. */
1851 64, /* size of prefetch block */
1852 6, /* number of parallel prefetches */
1853 3, /* Branch cost */
1854 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1855 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1856 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1857 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1858 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1859 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1860 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1861 DUMMY_STRINGOP_ALGS},
1862 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1863 DUMMY_STRINGOP_ALGS},
1864 1, /* scalar_stmt_cost. */
1865 1, /* scalar load_cost. */
1866 1, /* scalar_store_cost. */
1867 1, /* vec_stmt_cost. */
1868 1, /* vec_to_scalar_cost. */
1869 1, /* scalar_to_vec_cost. */
1870 1, /* vec_align_load_cost. */
1871 2, /* vec_unalign_load_cost. */
1872 1, /* vec_store_cost. */
1873 3, /* cond_taken_branch_cost. */
1874 1, /* cond_not_taken_branch_cost. */
1877 const struct processor_costs *ix86_cost = &pentium_cost;
1879 /* Processor feature/optimization bitmasks. */
1880 #define m_386 (1<<PROCESSOR_I386)
1881 #define m_486 (1<<PROCESSOR_I486)
1882 #define m_PENT (1<<PROCESSOR_PENTIUM)
1883 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1884 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1885 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1886 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1887 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1888 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1889 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1890 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1891 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1892 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1893 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1894 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1895 #define m_ATOM (1<<PROCESSOR_ATOM)
1897 #define m_GEODE (1<<PROCESSOR_GEODE)
1898 #define m_K6 (1<<PROCESSOR_K6)
1899 #define m_K6_GEODE (m_K6 | m_GEODE)
1900 #define m_K8 (1<<PROCESSOR_K8)
1901 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1902 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1903 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1904 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1905 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1906 #define m_BDVER (m_BDVER1 | m_BDVER2)
1907 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1908 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1910 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1911 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1913 /* Generic instruction choice should be common subset of supported CPUs
1914 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1915 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1917 /* Feature tests against the various tunings. */
1918 unsigned char ix86_tune_features[X86_TUNE_LAST];
1920 /* Feature tests against the various tunings used to create ix86_tune_features
1921 based on the processor mask. */
1922 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1923 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1924 negatively, so enabling for Generic64 seems like good code size
1925 tradeoff. We can't enable it for 32bit generic because it does not
1926 work well with PPro base chips. */
1927 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1929 /* X86_TUNE_PUSH_MEMORY */
1930 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1932 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1935 /* X86_TUNE_UNROLL_STRLEN */
1936 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1938 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1939 on simulation result. But after P4 was made, no performance benefit
1940 was observed with branch hints. It also increases the code size.
1941 As a result, icc never generates branch hints. */
1944 /* X86_TUNE_DOUBLE_WITH_ADD */
1947 /* X86_TUNE_USE_SAHF */
1948 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1950 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1951 partial dependencies. */
1952 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1954 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1955 register stalls on Generic32 compilation setting as well. However
1956 in current implementation the partial register stalls are not eliminated
1957 very well - they can be introduced via subregs synthesized by combine
1958 and can happen in caller/callee saving sequences. Because this option
1959 pays back little on PPro based chips and is in conflict with partial reg
1960 dependencies used by Athlon/P4 based chips, it is better to leave it off
1961 for generic32 for now. */
1964 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1965 m_CORE2I7 | m_GENERIC,
1967 /* X86_TUNE_USE_HIMODE_FIOP */
1968 m_386 | m_486 | m_K6_GEODE,
1970 /* X86_TUNE_USE_SIMODE_FIOP */
1971 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1973 /* X86_TUNE_USE_MOV0 */
1976 /* X86_TUNE_USE_CLTD */
1977 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1979 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1982 /* X86_TUNE_SPLIT_LONG_MOVES */
1985 /* X86_TUNE_READ_MODIFY_WRITE */
1988 /* X86_TUNE_READ_MODIFY */
1991 /* X86_TUNE_PROMOTE_QIMODE */
1992 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1994 /* X86_TUNE_FAST_PREFIX */
1995 ~(m_386 | m_486 | m_PENT),
1997 /* X86_TUNE_SINGLE_STRINGOP */
1998 m_386 | m_P4_NOCONA,
2000 /* X86_TUNE_QIMODE_MATH */
2003 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2004 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2005 might be considered for Generic32 if our scheme for avoiding partial
2006 stalls was more effective. */
2009 /* X86_TUNE_PROMOTE_QI_REGS */
2012 /* X86_TUNE_PROMOTE_HI_REGS */
2015 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2016 over esp addition. */
2017 m_386 | m_486 | m_PENT | m_PPRO,
2019 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2020 over esp addition. */
2023 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2024 over esp subtraction. */
2025 m_386 | m_486 | m_PENT | m_K6_GEODE,
2027 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2028 over esp subtraction. */
2029 m_PENT | m_K6_GEODE,
2031 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2032 for DFmode copies */
2033 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2035 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2036 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2038 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2039 conflict here in between PPro/Pentium4 based chips that thread 128bit
2040 SSE registers as single units versus K8 based chips that divide SSE
2041 registers to two 64bit halves. This knob promotes all store destinations
2042 to be 128bit to allow register renaming on 128bit SSE units, but usually
2043 results in one extra microop on 64bit SSE units. Experimental results
2044 shows that disabling this option on P4 brings over 20% SPECfp regression,
2045 while enabling it on K8 brings roughly 2.4% regression that can be partly
2046 masked by careful scheduling of moves. */
2047 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2049 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2050 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2052 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2055 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2058 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2059 are resolved on SSE register parts instead of whole registers, so we may
2060 maintain just lower part of scalar values in proper format leaving the
2061 upper part undefined. */
2064 /* X86_TUNE_SSE_TYPELESS_STORES */
2067 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2068 m_PPRO | m_P4_NOCONA,
2070 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2071 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2073 /* X86_TUNE_PROLOGUE_USING_MOVE */
2074 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2076 /* X86_TUNE_EPILOGUE_USING_MOVE */
2077 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2079 /* X86_TUNE_SHIFT1 */
2082 /* X86_TUNE_USE_FFREEP */
2085 /* X86_TUNE_INTER_UNIT_MOVES */
2086 ~(m_AMD_MULTIPLE | m_GENERIC),
2088 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2089 ~(m_AMDFAM10 | m_BDVER ),
2091 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2092 than 4 branch instructions in the 16 byte window. */
2093 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2095 /* X86_TUNE_SCHEDULE */
2096 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2098 /* X86_TUNE_USE_BT */
2099 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2101 /* X86_TUNE_USE_INCDEC */
2102 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2104 /* X86_TUNE_PAD_RETURNS */
2105 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2107 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2110 /* X86_TUNE_EXT_80387_CONSTANTS */
2111 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2113 /* X86_TUNE_SHORTEN_X87_SSE */
2116 /* X86_TUNE_AVOID_VECTOR_DECODE */
2117 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2119 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2120 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2123 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2124 vector path on AMD machines. */
2125 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2127 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2129 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2131 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2135 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2136 but one byte longer. */
2139 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2140 operand that cannot be represented using a modRM byte. The XOR
2141 replacement is long decoded, so this split helps here as well. */
2144 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2146 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2148 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2149 from integer to FP. */
2152 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2153 with a subsequent conditional jump instruction into a single
2154 compare-and-branch uop. */
2157 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2158 will impact LEA instruction selection. */
2161 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2165 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2166 at -O3. For the moment, the prefetching seems badly tuned for Intel
2168 m_K6_GEODE | m_AMD_MULTIPLE,
2170 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2171 the auto-vectorizer. */
2174 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2175 during reassociation of integer computation. */
2178 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2179 during reassociation of fp computation. */
2183 /* Feature tests against the various architecture variations. */
2184 unsigned char ix86_arch_features[X86_ARCH_LAST];
2186 /* Feature tests against the various architecture variations, used to create
2187 ix86_arch_features based on the processor mask. */
2188 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2189 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2190 ~(m_386 | m_486 | m_PENT | m_K6),
2192 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2195 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2198 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2201 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2205 static const unsigned int x86_accumulate_outgoing_args
2206 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2208 static const unsigned int x86_arch_always_fancy_math_387
2209 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2211 static const unsigned int x86_avx256_split_unaligned_load
2212 = m_COREI7 | m_GENERIC;
2214 static const unsigned int x86_avx256_split_unaligned_store
2215 = m_COREI7 | m_BDVER | m_GENERIC;
2217 /* In case the average insn count for single function invocation is
2218 lower than this constant, emit fast (but longer) prologue and
2220 #define FAST_PROLOGUE_INSN_COUNT 20
2222 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2223 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2224 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2225 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2227 /* Array of the smallest class containing reg number REGNO, indexed by
2228 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2230 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2232 /* ax, dx, cx, bx */
2233 AREG, DREG, CREG, BREG,
2234 /* si, di, bp, sp */
2235 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2237 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2238 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2241 /* flags, fpsr, fpcr, frame */
2242 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2244 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2247 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2252 /* SSE REX registers */
2253 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2257 /* The "default" register map used in 32bit mode. */
2259 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2263 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2270 /* The "default" register map used in 64bit mode. */
2272 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2274 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2275 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2276 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2277 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2278 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2279 8,9,10,11,12,13,14,15, /* extended integer registers */
2280 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2283 /* Define the register numbers to be used in Dwarf debugging information.
2284 The SVR4 reference port C compiler uses the following register numbers
2285 in its Dwarf output code:
2286 0 for %eax (gcc regno = 0)
2287 1 for %ecx (gcc regno = 2)
2288 2 for %edx (gcc regno = 1)
2289 3 for %ebx (gcc regno = 3)
2290 4 for %esp (gcc regno = 7)
2291 5 for %ebp (gcc regno = 6)
2292 6 for %esi (gcc regno = 4)
2293 7 for %edi (gcc regno = 5)
2294 The following three DWARF register numbers are never generated by
2295 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2296 believes these numbers have these meanings.
2297 8 for %eip (no gcc equivalent)
2298 9 for %eflags (gcc regno = 17)
2299 10 for %trapno (no gcc equivalent)
2300 It is not at all clear how we should number the FP stack registers
2301 for the x86 architecture. If the version of SDB on x86/svr4 were
2302 a bit less brain dead with respect to floating-point then we would
2303 have a precedent to follow with respect to DWARF register numbers
2304 for x86 FP registers, but the SDB on x86/svr4 is so completely
2305 broken with respect to FP registers that it is hardly worth thinking
2306 of it as something to strive for compatibility with.
2307 The version of x86/svr4 SDB I have at the moment does (partially)
2308 seem to believe that DWARF register number 11 is associated with
2309 the x86 register %st(0), but that's about all. Higher DWARF
2310 register numbers don't seem to be associated with anything in
2311 particular, and even for DWARF regno 11, SDB only seems to under-
2312 stand that it should say that a variable lives in %st(0) (when
2313 asked via an `=' command) if we said it was in DWARF regno 11,
2314 but SDB still prints garbage when asked for the value of the
2315 variable in question (via a `/' command).
2316 (Also note that the labels SDB prints for various FP stack regs
2317 when doing an `x' command are all wrong.)
2318 Note that these problems generally don't affect the native SVR4
2319 C compiler because it doesn't allow the use of -O with -g and
2320 because when it is *not* optimizing, it allocates a memory
2321 location for each floating-point variable, and the memory
2322 location is what gets described in the DWARF AT_location
2323 attribute for the variable in question.
2324 Regardless of the severe mental illness of the x86/svr4 SDB, we
2325 do something sensible here and we use the following DWARF
2326 register numbers. Note that these are all stack-top-relative
2328 11 for %st(0) (gcc regno = 8)
2329 12 for %st(1) (gcc regno = 9)
2330 13 for %st(2) (gcc regno = 10)
2331 14 for %st(3) (gcc regno = 11)
2332 15 for %st(4) (gcc regno = 12)
2333 16 for %st(5) (gcc regno = 13)
2334 17 for %st(6) (gcc regno = 14)
2335 18 for %st(7) (gcc regno = 15)
2337 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2339 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2340 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2341 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2342 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2343 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2345 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2348 /* Define parameter passing and return registers. */
2350 static int const x86_64_int_parameter_registers[6] =
2352 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2355 static int const x86_64_ms_abi_int_parameter_registers[4] =
2357 CX_REG, DX_REG, R8_REG, R9_REG
2360 static int const x86_64_int_return_registers[4] =
2362 AX_REG, DX_REG, DI_REG, SI_REG
2365 /* Define the structure for the machine field in struct function. */
2367 struct GTY(()) stack_local_entry {
2368 unsigned short mode;
2371 struct stack_local_entry *next;
2374 /* Structure describing stack frame layout.
2375 Stack grows downward:
2381 saved static chain if ix86_static_chain_on_stack
2383 saved frame pointer if frame_pointer_needed
2384 <- HARD_FRAME_POINTER
2390 <- sse_regs_save_offset
2393 [va_arg registers] |
2397 [padding2] | = to_allocate
2406 int outgoing_arguments_size;
2407 HOST_WIDE_INT frame;
2409 /* The offsets relative to ARG_POINTER. */
2410 HOST_WIDE_INT frame_pointer_offset;
2411 HOST_WIDE_INT hard_frame_pointer_offset;
2412 HOST_WIDE_INT stack_pointer_offset;
2413 HOST_WIDE_INT hfp_save_offset;
2414 HOST_WIDE_INT reg_save_offset;
2415 HOST_WIDE_INT sse_reg_save_offset;
2417 /* When save_regs_using_mov is set, emit prologue using
2418 move instead of push instructions. */
2419 bool save_regs_using_mov;
2422 /* Which cpu are we scheduling for. */
2423 enum attr_cpu ix86_schedule;
2425 /* Which cpu are we optimizing for. */
2426 enum processor_type ix86_tune;
2428 /* Which instruction set architecture to use. */
2429 enum processor_type ix86_arch;
2431 /* True if processor has SSE prefetch instruction. */
2432 int x86_prefetch_sse;
2434 /* True if processor has prefetchw instruction. */
2437 /* -mstackrealign option */
2438 static const char ix86_force_align_arg_pointer_string[]
2439 = "force_align_arg_pointer";
2441 static rtx (*ix86_gen_leave) (void);
2442 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2445 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2446 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2447 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2448 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2449 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2450 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2452 /* Preferred alignment for stack boundary in bits. */
2453 unsigned int ix86_preferred_stack_boundary;
2455 /* Alignment for incoming stack boundary in bits specified at
2457 static unsigned int ix86_user_incoming_stack_boundary;
2459 /* Default alignment for incoming stack boundary in bits. */
2460 static unsigned int ix86_default_incoming_stack_boundary;
2462 /* Alignment for incoming stack boundary in bits. */
2463 unsigned int ix86_incoming_stack_boundary;
2465 /* Calling abi specific va_list type nodes. */
2466 static GTY(()) tree sysv_va_list_type_node;
2467 static GTY(()) tree ms_va_list_type_node;
2469 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2470 char internal_label_prefix[16];
2471 int internal_label_prefix_len;
2473 /* Fence to use after loop using movnt. */
2476 /* Register class used for passing given 64bit part of the argument.
2477 These represent classes as documented by the PS ABI, with the exception
2478 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2479 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2481 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2482 whenever possible (upper half does contain padding). */
2483 enum x86_64_reg_class
2486 X86_64_INTEGER_CLASS,
2487 X86_64_INTEGERSI_CLASS,
2494 X86_64_COMPLEX_X87_CLASS,
2498 #define MAX_CLASSES 4
2500 /* Table of constants used by fldpi, fldln2, etc.... */
2501 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2502 static bool ext_80387_constants_init = 0;
2505 static struct machine_function * ix86_init_machine_status (void);
2506 static rtx ix86_function_value (const_tree, const_tree, bool);
2507 static bool ix86_function_value_regno_p (const unsigned int);
2508 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2510 static rtx ix86_static_chain (const_tree, bool);
2511 static int ix86_function_regparm (const_tree, const_tree);
2512 static void ix86_compute_frame_layout (struct ix86_frame *);
2513 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2515 static void ix86_add_new_builtins (HOST_WIDE_INT);
2516 static tree ix86_canonical_va_list_type (tree);
2517 static void predict_jump (int);
2518 static unsigned int split_stack_prologue_scratch_regno (void);
2519 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2521 enum ix86_function_specific_strings
2523 IX86_FUNCTION_SPECIFIC_ARCH,
2524 IX86_FUNCTION_SPECIFIC_TUNE,
2525 IX86_FUNCTION_SPECIFIC_MAX
2528 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2529 const char *, enum fpmath_unit, bool);
2530 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2531 static void ix86_function_specific_save (struct cl_target_option *);
2532 static void ix86_function_specific_restore (struct cl_target_option *);
2533 static void ix86_function_specific_print (FILE *, int,
2534 struct cl_target_option *);
2535 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2536 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2537 struct gcc_options *);
2538 static bool ix86_can_inline_p (tree, tree);
2539 static void ix86_set_current_function (tree);
2540 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2542 static enum calling_abi ix86_function_abi (const_tree);
2545 #ifndef SUBTARGET32_DEFAULT_CPU
2546 #define SUBTARGET32_DEFAULT_CPU "i386"
2549 /* The svr4 ABI for the i386 says that records and unions are returned
2551 #ifndef DEFAULT_PCC_STRUCT_RETURN
2552 #define DEFAULT_PCC_STRUCT_RETURN 1
2555 /* Whether -mtune= or -march= were specified */
2556 static int ix86_tune_defaulted;
2557 static int ix86_arch_specified;
2559 /* Vectorization library interface and handlers. */
2560 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2562 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2563 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2565 /* Processor target table, indexed by processor number */
2568 const struct processor_costs *cost; /* Processor costs */
2569 const int align_loop; /* Default alignments. */
2570 const int align_loop_max_skip;
2571 const int align_jump;
2572 const int align_jump_max_skip;
2573 const int align_func;
2576 static const struct ptt processor_target_table[PROCESSOR_max] =
2578 {&i386_cost, 4, 3, 4, 3, 4},
2579 {&i486_cost, 16, 15, 16, 15, 16},
2580 {&pentium_cost, 16, 7, 16, 7, 16},
2581 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2582 {&geode_cost, 0, 0, 0, 0, 0},
2583 {&k6_cost, 32, 7, 32, 7, 32},
2584 {&athlon_cost, 16, 7, 16, 7, 16},
2585 {&pentium4_cost, 0, 0, 0, 0, 0},
2586 {&k8_cost, 16, 7, 16, 7, 16},
2587 {&nocona_cost, 0, 0, 0, 0, 0},
2588 /* Core 2 32-bit. */
2589 {&generic32_cost, 16, 10, 16, 10, 16},
2590 /* Core 2 64-bit. */
2591 {&generic64_cost, 16, 10, 16, 10, 16},
2592 /* Core i7 32-bit. */
2593 {&generic32_cost, 16, 10, 16, 10, 16},
2594 /* Core i7 64-bit. */
2595 {&generic64_cost, 16, 10, 16, 10, 16},
2596 {&generic32_cost, 16, 7, 16, 7, 16},
2597 {&generic64_cost, 16, 10, 16, 10, 16},
2598 {&amdfam10_cost, 32, 24, 32, 7, 32},
2599 {&bdver1_cost, 32, 24, 32, 7, 32},
2600 {&bdver2_cost, 32, 24, 32, 7, 32},
2601 {&btver1_cost, 32, 24, 32, 7, 32},
2602 {&atom_cost, 16, 15, 16, 7, 16}
2605 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2635 /* Return true if a red-zone is in use. */
2638 ix86_using_red_zone (void)
2640 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2643 /* Return a string that documents the current -m options. The caller is
2644 responsible for freeing the string. */
2647 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2648 const char *tune, enum fpmath_unit fpmath,
2651 struct ix86_target_opts
2653 const char *option; /* option string */
2654 HOST_WIDE_INT mask; /* isa mask options */
2657 /* This table is ordered so that options like -msse4.2 that imply
2658 preceding options while match those first. */
2659 static struct ix86_target_opts isa_opts[] =
2661 { "-m64", OPTION_MASK_ISA_64BIT },
2662 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2663 { "-mfma", OPTION_MASK_ISA_FMA },
2664 { "-mxop", OPTION_MASK_ISA_XOP },
2665 { "-mlwp", OPTION_MASK_ISA_LWP },
2666 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2667 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2668 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2669 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2670 { "-msse3", OPTION_MASK_ISA_SSE3 },
2671 { "-msse2", OPTION_MASK_ISA_SSE2 },
2672 { "-msse", OPTION_MASK_ISA_SSE },
2673 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2674 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2675 { "-mmmx", OPTION_MASK_ISA_MMX },
2676 { "-mabm", OPTION_MASK_ISA_ABM },
2677 { "-mbmi", OPTION_MASK_ISA_BMI },
2678 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2679 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2680 { "-mtbm", OPTION_MASK_ISA_TBM },
2681 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2682 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2683 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2684 { "-maes", OPTION_MASK_ISA_AES },
2685 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2686 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2687 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2688 { "-mf16c", OPTION_MASK_ISA_F16C },
2692 static struct ix86_target_opts flag_opts[] =
2694 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2695 { "-m80387", MASK_80387 },
2696 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2697 { "-malign-double", MASK_ALIGN_DOUBLE },
2698 { "-mcld", MASK_CLD },
2699 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2700 { "-mieee-fp", MASK_IEEE_FP },
2701 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2702 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2703 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2704 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2705 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2706 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2707 { "-mno-red-zone", MASK_NO_RED_ZONE },
2708 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2709 { "-mrecip", MASK_RECIP },
2710 { "-mrtd", MASK_RTD },
2711 { "-msseregparm", MASK_SSEREGPARM },
2712 { "-mstack-arg-probe", MASK_STACK_PROBE },
2713 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2714 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2715 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2716 { "-mvzeroupper", MASK_VZEROUPPER },
2717 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2718 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2719 { "-mprefer-avx128", MASK_PREFER_AVX128},
2722 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2725 char target_other[40];
2734 memset (opts, '\0', sizeof (opts));
2736 /* Add -march= option. */
2739 opts[num][0] = "-march=";
2740 opts[num++][1] = arch;
2743 /* Add -mtune= option. */
2746 opts[num][0] = "-mtune=";
2747 opts[num++][1] = tune;
2750 /* Pick out the options in isa options. */
2751 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2753 if ((isa & isa_opts[i].mask) != 0)
2755 opts[num++][0] = isa_opts[i].option;
2756 isa &= ~ isa_opts[i].mask;
2760 if (isa && add_nl_p)
2762 opts[num++][0] = isa_other;
2763 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2767 /* Add flag options. */
2768 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2770 if ((flags & flag_opts[i].mask) != 0)
2772 opts[num++][0] = flag_opts[i].option;
2773 flags &= ~ flag_opts[i].mask;
2777 if (flags && add_nl_p)
2779 opts[num++][0] = target_other;
2780 sprintf (target_other, "(other flags: %#x)", flags);
2783 /* Add -fpmath= option. */
2786 opts[num][0] = "-mfpmath=";
2787 switch ((int) fpmath)
2790 opts[num++][1] = "387";
2794 opts[num++][1] = "sse";
2797 case FPMATH_387 | FPMATH_SSE:
2798 opts[num++][1] = "sse+387";
2810 gcc_assert (num < ARRAY_SIZE (opts));
2812 /* Size the string. */
2814 sep_len = (add_nl_p) ? 3 : 1;
2815 for (i = 0; i < num; i++)
2818 for (j = 0; j < 2; j++)
2820 len += strlen (opts[i][j]);
2823 /* Build the string. */
2824 ret = ptr = (char *) xmalloc (len);
2827 for (i = 0; i < num; i++)
2831 for (j = 0; j < 2; j++)
2832 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2839 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2847 for (j = 0; j < 2; j++)
2850 memcpy (ptr, opts[i][j], len2[j]);
2852 line_len += len2[j];
2857 gcc_assert (ret + len >= ptr);
2862 /* Return true, if profiling code should be emitted before
2863 prologue. Otherwise it returns false.
2864 Note: For x86 with "hotfix" it is sorried. */
2866 ix86_profile_before_prologue (void)
2868 return flag_fentry != 0;
2871 /* Function that is callable from the debugger to print the current
2874 ix86_debug_options (void)
2876 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2877 ix86_arch_string, ix86_tune_string,
2882 fprintf (stderr, "%s\n\n", opts);
2886 fputs ("<no options>\n\n", stderr);
2891 /* Override various settings based on options. If MAIN_ARGS_P, the
2892 options are from the command line, otherwise they are from
2896 ix86_option_override_internal (bool main_args_p)
2899 unsigned int ix86_arch_mask, ix86_tune_mask;
2900 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2905 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2906 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2907 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2908 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2909 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2910 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2911 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2912 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2913 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2914 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2915 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2916 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2917 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2918 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2919 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2920 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2921 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2922 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2923 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2924 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2925 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2926 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2927 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2928 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2929 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2930 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2931 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2932 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2933 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2934 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2935 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2936 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2937 #define PTA_PREFETCHW (HOST_WIDE_INT_1 << 32)
2939 /* if this reaches 64, need to widen struct pta flags below */
2943 const char *const name; /* processor name or nickname. */
2944 const enum processor_type processor;
2945 const enum attr_cpu schedule;
2946 const unsigned HOST_WIDE_INT flags;
2948 const processor_alias_table[] =
2950 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2951 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2952 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2953 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2954 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2955 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2956 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2957 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2958 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2959 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2960 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2961 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2962 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2964 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2966 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2967 PTA_MMX | PTA_SSE | PTA_SSE2},
2968 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2969 PTA_MMX |PTA_SSE | PTA_SSE2},
2970 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2971 PTA_MMX | PTA_SSE | PTA_SSE2},
2972 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2973 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2974 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2975 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2976 | PTA_CX16 | PTA_NO_SAHF},
2977 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2978 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2979 | PTA_SSSE3 | PTA_CX16},
2980 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2981 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2982 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2983 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2984 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2985 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2986 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2987 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2988 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2989 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2990 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2991 | PTA_RDRND | PTA_F16C},
2992 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2993 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2994 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2995 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2996 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2997 | PTA_FMA | PTA_MOVBE},
2998 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2999 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3000 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3001 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3003 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3004 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3005 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3006 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3007 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3008 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3009 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3010 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3011 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3012 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3013 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3014 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3015 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3016 {"x86-64", PROCESSOR_K8, CPU_K8,
3017 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3018 {"k8", PROCESSOR_K8, CPU_K8,
3019 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3020 | PTA_SSE2 | PTA_NO_SAHF},
3021 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3022 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3023 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3024 {"opteron", PROCESSOR_K8, CPU_K8,
3025 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3026 | PTA_SSE2 | PTA_NO_SAHF},
3027 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3028 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3029 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3030 {"athlon64", PROCESSOR_K8, CPU_K8,
3031 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3032 | PTA_SSE2 | PTA_NO_SAHF},
3033 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3034 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3035 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3036 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3037 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3038 | PTA_SSE2 | PTA_NO_SAHF},
3039 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3040 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3041 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3042 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3043 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3044 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3045 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3046 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3047 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
3048 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3049 | PTA_FMA4 | PTA_XOP | PTA_LWP},
3050 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3051 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3052 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
3053 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3054 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3056 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3057 PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
3058 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16},
3059 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3060 0 /* flags are only used for -march switch. */ },
3061 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3062 PTA_64BIT /* flags are only used for -march switch. */ },
3065 /* -mrecip options. */
3068 const char *string; /* option name */
3069 unsigned int mask; /* mask bits to set */
3071 const recip_options[] =
3073 { "all", RECIP_MASK_ALL },
3074 { "none", RECIP_MASK_NONE },
3075 { "div", RECIP_MASK_DIV },
3076 { "sqrt", RECIP_MASK_SQRT },
3077 { "vec-div", RECIP_MASK_VEC_DIV },
3078 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3081 int const pta_size = ARRAY_SIZE (processor_alias_table);
3083 /* Set up prefix/suffix so the error messages refer to either the command
3084 line argument, or the attribute(target). */
3093 prefix = "option(\"";
3098 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3099 SUBTARGET_OVERRIDE_OPTIONS;
3102 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3103 SUBSUBTARGET_OVERRIDE_OPTIONS;
3107 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3109 /* -fPIC is the default for x86_64. */
3110 if (TARGET_MACHO && TARGET_64BIT)
3113 /* Need to check -mtune=generic first. */
3114 if (ix86_tune_string)
3116 if (!strcmp (ix86_tune_string, "generic")
3117 || !strcmp (ix86_tune_string, "i686")
3118 /* As special support for cross compilers we read -mtune=native
3119 as -mtune=generic. With native compilers we won't see the
3120 -mtune=native, as it was changed by the driver. */
3121 || !strcmp (ix86_tune_string, "native"))
3124 ix86_tune_string = "generic64";
3126 ix86_tune_string = "generic32";
3128 /* If this call is for setting the option attribute, allow the
3129 generic32/generic64 that was previously set. */
3130 else if (!main_args_p
3131 && (!strcmp (ix86_tune_string, "generic32")
3132 || !strcmp (ix86_tune_string, "generic64")))
3134 else if (!strncmp (ix86_tune_string, "generic", 7))
3135 error ("bad value (%s) for %stune=%s %s",
3136 ix86_tune_string, prefix, suffix, sw);
3137 else if (!strcmp (ix86_tune_string, "x86-64"))
3138 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3139 "%stune=k8%s or %stune=generic%s instead as appropriate",
3140 prefix, suffix, prefix, suffix, prefix, suffix);
3144 if (ix86_arch_string)
3145 ix86_tune_string = ix86_arch_string;
3146 if (!ix86_tune_string)
3148 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3149 ix86_tune_defaulted = 1;
3152 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3153 need to use a sensible tune option. */
3154 if (!strcmp (ix86_tune_string, "generic")
3155 || !strcmp (ix86_tune_string, "x86-64")
3156 || !strcmp (ix86_tune_string, "i686"))
3159 ix86_tune_string = "generic64";
3161 ix86_tune_string = "generic32";
3165 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3167 /* rep; movq isn't available in 32-bit code. */
3168 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3169 ix86_stringop_alg = no_stringop;
3172 if (!ix86_arch_string)
3173 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3175 ix86_arch_specified = 1;
3177 if (!global_options_set.x_ix86_abi)
3178 ix86_abi = DEFAULT_ABI;
3180 if (global_options_set.x_ix86_cmodel)
3182 switch (ix86_cmodel)
3187 ix86_cmodel = CM_SMALL_PIC;
3189 error ("code model %qs not supported in the %s bit mode",
3196 ix86_cmodel = CM_MEDIUM_PIC;
3198 error ("code model %qs not supported in the %s bit mode",
3200 else if (TARGET_X32)
3201 error ("code model %qs not supported in x32 mode",
3208 ix86_cmodel = CM_LARGE_PIC;
3210 error ("code model %qs not supported in the %s bit mode",
3212 else if (TARGET_X32)
3213 error ("code model %qs not supported in x32 mode",
3219 error ("code model %s does not support PIC mode", "32");
3221 error ("code model %qs not supported in the %s bit mode",
3228 error ("code model %s does not support PIC mode", "kernel");
3229 ix86_cmodel = CM_32;
3232 error ("code model %qs not supported in the %s bit mode",
3242 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3243 use of rip-relative addressing. This eliminates fixups that
3244 would otherwise be needed if this object is to be placed in a
3245 DLL, and is essentially just as efficient as direct addressing. */
3246 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3247 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3248 else if (TARGET_64BIT)
3249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3251 ix86_cmodel = CM_32;
3253 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3255 error ("-masm=intel not supported in this configuration");
3256 ix86_asm_dialect = ASM_ATT;
3258 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3259 sorry ("%i-bit mode not compiled in",
3260 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3262 for (i = 0; i < pta_size; i++)
3263 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3265 ix86_schedule = processor_alias_table[i].schedule;
3266 ix86_arch = processor_alias_table[i].processor;
3267 /* Default cpu tuning to the architecture. */
3268 ix86_tune = ix86_arch;
3270 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3271 error ("CPU you selected does not support x86-64 "
3274 if (processor_alias_table[i].flags & PTA_MMX
3275 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3276 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3277 if (processor_alias_table[i].flags & PTA_3DNOW
3278 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3279 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3280 if (processor_alias_table[i].flags & PTA_3DNOW_A
3281 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3282 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3283 if (processor_alias_table[i].flags & PTA_SSE
3284 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3285 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3286 if (processor_alias_table[i].flags & PTA_SSE2
3287 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3288 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3289 if (processor_alias_table[i].flags & PTA_SSE3
3290 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3291 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3292 if (processor_alias_table[i].flags & PTA_SSSE3
3293 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3294 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3295 if (processor_alias_table[i].flags & PTA_SSE4_1
3296 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3297 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3298 if (processor_alias_table[i].flags & PTA_SSE4_2
3299 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3300 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3301 if (processor_alias_table[i].flags & PTA_AVX
3302 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3303 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3304 if (processor_alias_table[i].flags & PTA_AVX2
3305 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3306 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3307 if (processor_alias_table[i].flags & PTA_FMA
3308 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3309 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3310 if (processor_alias_table[i].flags & PTA_SSE4A
3311 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3312 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3313 if (processor_alias_table[i].flags & PTA_FMA4
3314 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3315 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3316 if (processor_alias_table[i].flags & PTA_XOP
3317 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3318 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3319 if (processor_alias_table[i].flags & PTA_LWP
3320 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3321 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3322 if (processor_alias_table[i].flags & PTA_ABM
3323 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3324 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3325 if (processor_alias_table[i].flags & PTA_BMI
3326 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3327 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3328 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3329 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3330 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3331 if (processor_alias_table[i].flags & PTA_TBM
3332 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3333 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3334 if (processor_alias_table[i].flags & PTA_BMI2
3335 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3336 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3337 if (processor_alias_table[i].flags & PTA_CX16
3338 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3339 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3340 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3341 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3342 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3343 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3344 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3345 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3346 if (processor_alias_table[i].flags & PTA_MOVBE
3347 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3348 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3349 if (processor_alias_table[i].flags & PTA_AES
3350 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3351 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3352 if (processor_alias_table[i].flags & PTA_PCLMUL
3353 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3354 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3355 if (processor_alias_table[i].flags & PTA_FSGSBASE
3356 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3357 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3358 if (processor_alias_table[i].flags & PTA_RDRND
3359 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3360 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3361 if (processor_alias_table[i].flags & PTA_F16C
3362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3363 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3364 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3365 x86_prefetch_sse = true;
3366 if (processor_alias_table[i].flags & PTA_PREFETCHW)
3367 x86_prefetchw = true;
3372 if (!strcmp (ix86_arch_string, "generic"))
3373 error ("generic CPU can be used only for %stune=%s %s",
3374 prefix, suffix, sw);
3375 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3376 error ("bad value (%s) for %sarch=%s %s",
3377 ix86_arch_string, prefix, suffix, sw);
3379 ix86_arch_mask = 1u << ix86_arch;
3380 for (i = 0; i < X86_ARCH_LAST; ++i)
3381 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3383 for (i = 0; i < pta_size; i++)
3384 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3386 ix86_schedule = processor_alias_table[i].schedule;
3387 ix86_tune = processor_alias_table[i].processor;
3390 if (!(processor_alias_table[i].flags & PTA_64BIT))
3392 if (ix86_tune_defaulted)
3394 ix86_tune_string = "x86-64";
3395 for (i = 0; i < pta_size; i++)
3396 if (! strcmp (ix86_tune_string,
3397 processor_alias_table[i].name))
3399 ix86_schedule = processor_alias_table[i].schedule;
3400 ix86_tune = processor_alias_table[i].processor;
3403 error ("CPU you selected does not support x86-64 "
3409 /* Adjust tuning when compiling for 32-bit ABI. */
3412 case PROCESSOR_GENERIC64:
3413 ix86_tune = PROCESSOR_GENERIC32;
3414 ix86_schedule = CPU_PENTIUMPRO;
3417 case PROCESSOR_CORE2_64:
3418 ix86_tune = PROCESSOR_CORE2_32;
3421 case PROCESSOR_COREI7_64:
3422 ix86_tune = PROCESSOR_COREI7_32;
3429 /* Intel CPUs have always interpreted SSE prefetch instructions as
3430 NOPs; so, we can enable SSE prefetch instructions even when
3431 -mtune (rather than -march) points us to a processor that has them.
3432 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3433 higher processors. */
3435 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3436 x86_prefetch_sse = true;
3440 if (ix86_tune_specified && i == pta_size)
3441 error ("bad value (%s) for %stune=%s %s",
3442 ix86_tune_string, prefix, suffix, sw);
3444 ix86_tune_mask = 1u << ix86_tune;
3445 for (i = 0; i < X86_TUNE_LAST; ++i)
3446 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3448 #ifndef USE_IX86_FRAME_POINTER
3449 #define USE_IX86_FRAME_POINTER 0
3452 #ifndef USE_X86_64_FRAME_POINTER
3453 #define USE_X86_64_FRAME_POINTER 0
3456 /* Set the default values for switches whose default depends on TARGET_64BIT
3457 in case they weren't overwritten by command line options. */
3460 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3461 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3462 if (flag_asynchronous_unwind_tables == 2)
3463 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3464 if (flag_pcc_struct_return == 2)
3465 flag_pcc_struct_return = 0;
3469 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3470 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3471 if (flag_asynchronous_unwind_tables == 2)
3472 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3473 if (flag_pcc_struct_return == 2)
3474 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3478 ix86_cost = &ix86_size_cost;
3480 ix86_cost = processor_target_table[ix86_tune].cost;
3482 /* Arrange to set up i386_stack_locals for all functions. */
3483 init_machine_status = ix86_init_machine_status;
3485 /* Validate -mregparm= value. */
3486 if (global_options_set.x_ix86_regparm)
3489 warning (0, "-mregparm is ignored in 64-bit mode");
3490 if (ix86_regparm > REGPARM_MAX)
3492 error ("-mregparm=%d is not between 0 and %d",
3493 ix86_regparm, REGPARM_MAX);
3498 ix86_regparm = REGPARM_MAX;
3500 /* Default align_* from the processor table. */
3501 if (align_loops == 0)
3503 align_loops = processor_target_table[ix86_tune].align_loop;
3504 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3506 if (align_jumps == 0)
3508 align_jumps = processor_target_table[ix86_tune].align_jump;
3509 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3511 if (align_functions == 0)
3513 align_functions = processor_target_table[ix86_tune].align_func;
3516 /* Provide default for -mbranch-cost= value. */
3517 if (!global_options_set.x_ix86_branch_cost)
3518 ix86_branch_cost = ix86_cost->branch_cost;
3522 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3524 /* Enable by default the SSE and MMX builtins. Do allow the user to
3525 explicitly disable any of these. In particular, disabling SSE and
3526 MMX for kernel code is extremely useful. */
3527 if (!ix86_arch_specified)
3529 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3530 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3533 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3537 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3539 if (!ix86_arch_specified)
3541 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3543 /* i386 ABI does not specify red zone. It still makes sense to use it
3544 when programmer takes care to stack from being destroyed. */
3545 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3546 target_flags |= MASK_NO_RED_ZONE;
3549 /* Keep nonleaf frame pointers. */
3550 if (flag_omit_frame_pointer)
3551 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3552 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3553 flag_omit_frame_pointer = 1;
3555 /* If we're doing fast math, we don't care about comparison order
3556 wrt NaNs. This lets us use a shorter comparison sequence. */
3557 if (flag_finite_math_only)
3558 target_flags &= ~MASK_IEEE_FP;
3560 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3561 since the insns won't need emulation. */
3562 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3563 target_flags &= ~MASK_NO_FANCY_MATH_387;
3565 /* Likewise, if the target doesn't have a 387, or we've specified
3566 software floating point, don't use 387 inline intrinsics. */
3568 target_flags |= MASK_NO_FANCY_MATH_387;
3570 /* Turn on MMX builtins for -msse. */
3573 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3574 x86_prefetch_sse = true;
3577 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3578 if (TARGET_SSE4_2 || TARGET_ABM)
3579 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3581 /* Turn on lzcnt instruction for -mabm. */
3583 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3585 /* Validate -mpreferred-stack-boundary= value or default it to
3586 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3587 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3588 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3590 int min = (TARGET_64BIT ? 4 : 2);
3591 int max = (TARGET_SEH ? 4 : 12);
3593 if (ix86_preferred_stack_boundary_arg < min
3594 || ix86_preferred_stack_boundary_arg > max)
3597 error ("-mpreferred-stack-boundary is not supported "
3600 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3601 ix86_preferred_stack_boundary_arg, min, max);
3604 ix86_preferred_stack_boundary
3605 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3608 /* Set the default value for -mstackrealign. */
3609 if (ix86_force_align_arg_pointer == -1)
3610 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3612 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3614 /* Validate -mincoming-stack-boundary= value or default it to
3615 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3616 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3617 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3619 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3620 || ix86_incoming_stack_boundary_arg > 12)
3621 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3622 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3625 ix86_user_incoming_stack_boundary
3626 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3627 ix86_incoming_stack_boundary
3628 = ix86_user_incoming_stack_boundary;
3632 /* Accept -msseregparm only if at least SSE support is enabled. */
3633 if (TARGET_SSEREGPARM
3635 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3637 if (global_options_set.x_ix86_fpmath)
3639 if (ix86_fpmath & FPMATH_SSE)
3643 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3644 ix86_fpmath = FPMATH_387;
3646 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3648 warning (0, "387 instruction set disabled, using SSE arithmetics");
3649 ix86_fpmath = FPMATH_SSE;
3654 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3656 /* If the i387 is disabled, then do not return values in it. */
3658 target_flags &= ~MASK_FLOAT_RETURNS;
3660 /* Use external vectorized library in vectorizing intrinsics. */
3661 if (global_options_set.x_ix86_veclibabi_type)
3662 switch (ix86_veclibabi_type)
3664 case ix86_veclibabi_type_svml:
3665 ix86_veclib_handler = ix86_veclibabi_svml;
3668 case ix86_veclibabi_type_acml:
3669 ix86_veclib_handler = ix86_veclibabi_acml;
3676 if ((!USE_IX86_FRAME_POINTER
3677 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3678 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3680 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3682 /* ??? Unwind info is not correct around the CFG unless either a frame
3683 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3684 unwind info generation to be aware of the CFG and propagating states
3686 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3687 || flag_exceptions || flag_non_call_exceptions)
3688 && flag_omit_frame_pointer
3689 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3691 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3692 warning (0, "unwind tables currently require either a frame pointer "
3693 "or %saccumulate-outgoing-args%s for correctness",
3695 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3698 /* If stack probes are required, the space used for large function
3699 arguments on the stack must also be probed, so enable
3700 -maccumulate-outgoing-args so this happens in the prologue. */
3701 if (TARGET_STACK_PROBE
3702 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3704 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3705 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3706 "for correctness", prefix, suffix);
3707 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3710 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3713 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3714 p = strchr (internal_label_prefix, 'X');
3715 internal_label_prefix_len = p - internal_label_prefix;
3719 /* When scheduling description is not available, disable scheduler pass
3720 so it won't slow down the compilation and make x87 code slower. */
3721 if (!TARGET_SCHEDULE)
3722 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3724 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3725 ix86_cost->simultaneous_prefetches,
3726 global_options.x_param_values,
3727 global_options_set.x_param_values);
3728 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3729 global_options.x_param_values,
3730 global_options_set.x_param_values);
3731 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3732 global_options.x_param_values,
3733 global_options_set.x_param_values);
3734 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3735 global_options.x_param_values,
3736 global_options_set.x_param_values);
3738 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3739 if (flag_prefetch_loop_arrays < 0
3742 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3743 flag_prefetch_loop_arrays = 1;
3745 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3746 can be optimized to ap = __builtin_next_arg (0). */
3747 if (!TARGET_64BIT && !flag_split_stack)
3748 targetm.expand_builtin_va_start = NULL;
3752 ix86_gen_leave = gen_leave_rex64;
3753 ix86_gen_add3 = gen_adddi3;
3754 ix86_gen_sub3 = gen_subdi3;
3755 ix86_gen_sub3_carry = gen_subdi3_carry;
3756 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3757 ix86_gen_monitor = gen_sse3_monitor64;
3758 ix86_gen_andsp = gen_anddi3;
3759 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3760 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3761 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3765 ix86_gen_leave = gen_leave;
3766 ix86_gen_add3 = gen_addsi3;
3767 ix86_gen_sub3 = gen_subsi3;
3768 ix86_gen_sub3_carry = gen_subsi3_carry;
3769 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3770 ix86_gen_monitor = gen_sse3_monitor;
3771 ix86_gen_andsp = gen_andsi3;
3772 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3773 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3774 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3778 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3780 target_flags |= MASK_CLD & ~target_flags_explicit;
3783 if (!TARGET_64BIT && flag_pic)
3785 if (flag_fentry > 0)
3786 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3790 else if (TARGET_SEH)
3792 if (flag_fentry == 0)
3793 sorry ("-mno-fentry isn%'t compatible with SEH");
3796 else if (flag_fentry < 0)
3798 #if defined(PROFILE_BEFORE_PROLOGUE)
3807 /* When not optimize for size, enable vzeroupper optimization for
3808 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3809 AVX unaligned load/store. */
3812 if (flag_expensive_optimizations
3813 && !(target_flags_explicit & MASK_VZEROUPPER))
3814 target_flags |= MASK_VZEROUPPER;
3815 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3816 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3817 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3818 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3819 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3820 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3821 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3822 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3823 target_flags |= MASK_PREFER_AVX128;
3828 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3829 target_flags &= ~MASK_VZEROUPPER;
3832 if (ix86_recip_name)
3834 char *p = ASTRDUP (ix86_recip_name);
3836 unsigned int mask, i;
3839 while ((q = strtok (p, ",")) != NULL)
3850 if (!strcmp (q, "default"))
3851 mask = RECIP_MASK_ALL;
3854 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3855 if (!strcmp (q, recip_options[i].string))
3857 mask = recip_options[i].mask;
3861 if (i == ARRAY_SIZE (recip_options))
3863 error ("unknown option for -mrecip=%s", q);
3865 mask = RECIP_MASK_NONE;
3869 recip_mask_explicit |= mask;
3871 recip_mask &= ~mask;
3878 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
3879 else if (target_flags_explicit & MASK_RECIP)
3880 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
3882 /* Save the initial options in case the user does function specific
3885 target_option_default_node = target_option_current_node
3886 = build_target_option_node ();
3889 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3892 function_pass_avx256_p (const_rtx val)
3897 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3900 if (GET_CODE (val) == PARALLEL)
3905 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3907 r = XVECEXP (val, 0, i);
3908 if (GET_CODE (r) == EXPR_LIST
3910 && REG_P (XEXP (r, 0))
3911 && (GET_MODE (XEXP (r, 0)) == OImode
3912 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3920 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3923 ix86_option_override (void)
3925 ix86_option_override_internal (true);
3928 /* Update register usage after having seen the compiler flags. */
3931 ix86_conditional_register_usage (void)
3936 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3938 if (fixed_regs[i] > 1)
3939 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3940 if (call_used_regs[i] > 1)
3941 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3944 /* The PIC register, if it exists, is fixed. */
3945 j = PIC_OFFSET_TABLE_REGNUM;
3946 if (j != INVALID_REGNUM)
3947 fixed_regs[j] = call_used_regs[j] = 1;
3949 /* The 64-bit MS_ABI changes the set of call-used registers. */
3950 if (TARGET_64BIT_MS_ABI)
3952 call_used_regs[SI_REG] = 0;
3953 call_used_regs[DI_REG] = 0;
3954 call_used_regs[XMM6_REG] = 0;
3955 call_used_regs[XMM7_REG] = 0;
3956 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3957 call_used_regs[i] = 0;
3960 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3961 other call-clobbered regs for 64-bit. */
3964 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3966 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3967 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3968 && call_used_regs[i])
3969 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3972 /* If MMX is disabled, squash the registers. */
3974 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3975 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3976 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3978 /* If SSE is disabled, squash the registers. */
3980 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3981 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3982 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3984 /* If the FPU is disabled, squash the registers. */
3985 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3986 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3987 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3988 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3990 /* If 32-bit, squash the 64-bit registers. */
3993 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3995 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4001 /* Save the current options */
4004 ix86_function_specific_save (struct cl_target_option *ptr)
4006 ptr->arch = ix86_arch;
4007 ptr->schedule = ix86_schedule;
4008 ptr->tune = ix86_tune;
4009 ptr->branch_cost = ix86_branch_cost;
4010 ptr->tune_defaulted = ix86_tune_defaulted;
4011 ptr->arch_specified = ix86_arch_specified;
4012 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4013 ptr->ix86_target_flags_explicit = target_flags_explicit;
4014 ptr->x_recip_mask_explicit = recip_mask_explicit;
4016 /* The fields are char but the variables are not; make sure the
4017 values fit in the fields. */
4018 gcc_assert (ptr->arch == ix86_arch);
4019 gcc_assert (ptr->schedule == ix86_schedule);
4020 gcc_assert (ptr->tune == ix86_tune);
4021 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4024 /* Restore the current options */
4027 ix86_function_specific_restore (struct cl_target_option *ptr)
4029 enum processor_type old_tune = ix86_tune;
4030 enum processor_type old_arch = ix86_arch;
4031 unsigned int ix86_arch_mask, ix86_tune_mask;
4034 ix86_arch = (enum processor_type) ptr->arch;
4035 ix86_schedule = (enum attr_cpu) ptr->schedule;
4036 ix86_tune = (enum processor_type) ptr->tune;
4037 ix86_branch_cost = ptr->branch_cost;
4038 ix86_tune_defaulted = ptr->tune_defaulted;
4039 ix86_arch_specified = ptr->arch_specified;
4040 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4041 target_flags_explicit = ptr->ix86_target_flags_explicit;
4042 recip_mask_explicit = ptr->x_recip_mask_explicit;
4044 /* Recreate the arch feature tests if the arch changed */
4045 if (old_arch != ix86_arch)
4047 ix86_arch_mask = 1u << ix86_arch;
4048 for (i = 0; i < X86_ARCH_LAST; ++i)
4049 ix86_arch_features[i]
4050 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4053 /* Recreate the tune optimization tests */
4054 if (old_tune != ix86_tune)
4056 ix86_tune_mask = 1u << ix86_tune;
4057 for (i = 0; i < X86_TUNE_LAST; ++i)
4058 ix86_tune_features[i]
4059 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4063 /* Print the current options */
4066 ix86_function_specific_print (FILE *file, int indent,
4067 struct cl_target_option *ptr)
4070 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4071 NULL, NULL, ptr->x_ix86_fpmath, false);
4073 fprintf (file, "%*sarch = %d (%s)\n",
4076 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4077 ? cpu_names[ptr->arch]
4080 fprintf (file, "%*stune = %d (%s)\n",
4083 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4084 ? cpu_names[ptr->tune]
4087 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4091 fprintf (file, "%*s%s\n", indent, "", target_string);
4092 free (target_string);
4097 /* Inner function to process the attribute((target(...))), take an argument and
4098 set the current options from the argument. If we have a list, recursively go
4102 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4103 struct gcc_options *enum_opts_set)
4108 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4109 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4110 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4111 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4112 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4128 enum ix86_opt_type type;
4133 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4134 IX86_ATTR_ISA ("abm", OPT_mabm),
4135 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4136 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4137 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4138 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4139 IX86_ATTR_ISA ("aes", OPT_maes),
4140 IX86_ATTR_ISA ("avx", OPT_mavx),
4141 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4142 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4143 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4144 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4145 IX86_ATTR_ISA ("sse", OPT_msse),
4146 IX86_ATTR_ISA ("sse2", OPT_msse2),
4147 IX86_ATTR_ISA ("sse3", OPT_msse3),
4148 IX86_ATTR_ISA ("sse4", OPT_msse4),
4149 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4150 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4151 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4152 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4153 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4154 IX86_ATTR_ISA ("fma", OPT_mfma),
4155 IX86_ATTR_ISA ("xop", OPT_mxop),
4156 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4157 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4158 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4159 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4162 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4164 /* string options */
4165 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4166 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4169 IX86_ATTR_YES ("cld",
4173 IX86_ATTR_NO ("fancy-math-387",
4174 OPT_mfancy_math_387,
4175 MASK_NO_FANCY_MATH_387),
4177 IX86_ATTR_YES ("ieee-fp",
4181 IX86_ATTR_YES ("inline-all-stringops",
4182 OPT_minline_all_stringops,
4183 MASK_INLINE_ALL_STRINGOPS),
4185 IX86_ATTR_YES ("inline-stringops-dynamically",
4186 OPT_minline_stringops_dynamically,
4187 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4189 IX86_ATTR_NO ("align-stringops",
4190 OPT_mno_align_stringops,
4191 MASK_NO_ALIGN_STRINGOPS),
4193 IX86_ATTR_YES ("recip",
4199 /* If this is a list, recurse to get the options. */
4200 if (TREE_CODE (args) == TREE_LIST)
4204 for (; args; args = TREE_CHAIN (args))
4205 if (TREE_VALUE (args)
4206 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4207 p_strings, enum_opts_set))
4213 else if (TREE_CODE (args) != STRING_CST)
4216 /* Handle multiple arguments separated by commas. */
4217 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4219 while (next_optstr && *next_optstr != '\0')
4221 char *p = next_optstr;
4223 char *comma = strchr (next_optstr, ',');
4224 const char *opt_string;
4225 size_t len, opt_len;
4230 enum ix86_opt_type type = ix86_opt_unknown;
4236 len = comma - next_optstr;
4237 next_optstr = comma + 1;
4245 /* Recognize no-xxx. */
4246 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4255 /* Find the option. */
4258 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4260 type = attrs[i].type;
4261 opt_len = attrs[i].len;
4262 if (ch == attrs[i].string[0]
4263 && ((type != ix86_opt_str && type != ix86_opt_enum)
4266 && memcmp (p, attrs[i].string, opt_len) == 0)
4269 mask = attrs[i].mask;
4270 opt_string = attrs[i].string;
4275 /* Process the option. */
4278 error ("attribute(target(\"%s\")) is unknown", orig_p);
4282 else if (type == ix86_opt_isa)
4284 struct cl_decoded_option decoded;
4286 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4287 ix86_handle_option (&global_options, &global_options_set,
4288 &decoded, input_location);
4291 else if (type == ix86_opt_yes || type == ix86_opt_no)
4293 if (type == ix86_opt_no)
4294 opt_set_p = !opt_set_p;
4297 target_flags |= mask;
4299 target_flags &= ~mask;
4302 else if (type == ix86_opt_str)
4306 error ("option(\"%s\") was already specified", opt_string);
4310 p_strings[opt] = xstrdup (p + opt_len);
4313 else if (type == ix86_opt_enum)
4318 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4320 set_option (&global_options, enum_opts_set, opt, value,
4321 p + opt_len, DK_UNSPECIFIED, input_location,
4325 error ("attribute(target(\"%s\")) is unknown", orig_p);
4337 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4340 ix86_valid_target_attribute_tree (tree args)
4342 const char *orig_arch_string = ix86_arch_string;
4343 const char *orig_tune_string = ix86_tune_string;
4344 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4345 int orig_tune_defaulted = ix86_tune_defaulted;
4346 int orig_arch_specified = ix86_arch_specified;
4347 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4350 struct cl_target_option *def
4351 = TREE_TARGET_OPTION (target_option_default_node);
4352 struct gcc_options enum_opts_set;
4354 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4356 /* Process each of the options on the chain. */
4357 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4361 /* If the changed options are different from the default, rerun
4362 ix86_option_override_internal, and then save the options away.
4363 The string options are are attribute options, and will be undone
4364 when we copy the save structure. */
4365 if (ix86_isa_flags != def->x_ix86_isa_flags
4366 || target_flags != def->x_target_flags
4367 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4368 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4369 || enum_opts_set.x_ix86_fpmath)
4371 /* If we are using the default tune= or arch=, undo the string assigned,
4372 and use the default. */
4373 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4374 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4375 else if (!orig_arch_specified)
4376 ix86_arch_string = NULL;
4378 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4379 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4380 else if (orig_tune_defaulted)
4381 ix86_tune_string = NULL;
4383 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4384 if (enum_opts_set.x_ix86_fpmath)
4385 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4386 else if (!TARGET_64BIT && TARGET_SSE)
4388 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4389 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4392 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4393 ix86_option_override_internal (false);
4395 /* Add any builtin functions with the new isa if any. */
4396 ix86_add_new_builtins (ix86_isa_flags);
4398 /* Save the current options unless we are validating options for
4400 t = build_target_option_node ();
4402 ix86_arch_string = orig_arch_string;
4403 ix86_tune_string = orig_tune_string;
4404 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4406 /* Free up memory allocated to hold the strings */
4407 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4408 free (option_strings[i]);
4414 /* Hook to validate attribute((target("string"))). */
4417 ix86_valid_target_attribute_p (tree fndecl,
4418 tree ARG_UNUSED (name),
4420 int ARG_UNUSED (flags))
4422 struct cl_target_option cur_target;
4424 tree old_optimize = build_optimization_node ();
4425 tree new_target, new_optimize;
4426 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4428 /* If the function changed the optimization levels as well as setting target
4429 options, start with the optimizations specified. */
4430 if (func_optimize && func_optimize != old_optimize)
4431 cl_optimization_restore (&global_options,
4432 TREE_OPTIMIZATION (func_optimize));
4434 /* The target attributes may also change some optimization flags, so update
4435 the optimization options if necessary. */
4436 cl_target_option_save (&cur_target, &global_options);
4437 new_target = ix86_valid_target_attribute_tree (args);
4438 new_optimize = build_optimization_node ();
4445 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4447 if (old_optimize != new_optimize)
4448 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4451 cl_target_option_restore (&global_options, &cur_target);
4453 if (old_optimize != new_optimize)
4454 cl_optimization_restore (&global_options,
4455 TREE_OPTIMIZATION (old_optimize));
4461 /* Hook to determine if one function can safely inline another. */
4464 ix86_can_inline_p (tree caller, tree callee)
4467 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4468 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4470 /* If callee has no option attributes, then it is ok to inline. */
4474 /* If caller has no option attributes, but callee does then it is not ok to
4476 else if (!caller_tree)
4481 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4482 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4484 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4485 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4487 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4488 != callee_opts->x_ix86_isa_flags)
4491 /* See if we have the same non-isa options. */
4492 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4495 /* See if arch, tune, etc. are the same. */
4496 else if (caller_opts->arch != callee_opts->arch)
4499 else if (caller_opts->tune != callee_opts->tune)
4502 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4505 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4516 /* Remember the last target of ix86_set_current_function. */
4517 static GTY(()) tree ix86_previous_fndecl;
4519 /* Establish appropriate back-end context for processing the function
4520 FNDECL. The argument might be NULL to indicate processing at top
4521 level, outside of any function scope. */
4523 ix86_set_current_function (tree fndecl)
4525 /* Only change the context if the function changes. This hook is called
4526 several times in the course of compiling a function, and we don't want to
4527 slow things down too much or call target_reinit when it isn't safe. */
4528 if (fndecl && fndecl != ix86_previous_fndecl)
4530 tree old_tree = (ix86_previous_fndecl
4531 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4534 tree new_tree = (fndecl
4535 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4538 ix86_previous_fndecl = fndecl;
4539 if (old_tree == new_tree)
4544 cl_target_option_restore (&global_options,
4545 TREE_TARGET_OPTION (new_tree));
4551 struct cl_target_option *def
4552 = TREE_TARGET_OPTION (target_option_current_node);
4554 cl_target_option_restore (&global_options, def);
4561 /* Return true if this goes in large data/bss. */
4564 ix86_in_large_data_p (tree exp)
4566 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4569 /* Functions are never large data. */
4570 if (TREE_CODE (exp) == FUNCTION_DECL)
4573 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4575 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4576 if (strcmp (section, ".ldata") == 0
4577 || strcmp (section, ".lbss") == 0)
4583 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4585 /* If this is an incomplete type with size 0, then we can't put it
4586 in data because it might be too big when completed. */
4587 if (!size || size > ix86_section_threshold)
4594 /* Switch to the appropriate section for output of DECL.
4595 DECL is either a `VAR_DECL' node or a constant of some sort.
4596 RELOC indicates whether forming the initial value of DECL requires
4597 link-time relocations. */
4599 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4603 x86_64_elf_select_section (tree decl, int reloc,
4604 unsigned HOST_WIDE_INT align)
4606 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4607 && ix86_in_large_data_p (decl))
4609 const char *sname = NULL;
4610 unsigned int flags = SECTION_WRITE;
4611 switch (categorize_decl_for_section (decl, reloc))
4616 case SECCAT_DATA_REL:
4617 sname = ".ldata.rel";
4619 case SECCAT_DATA_REL_LOCAL:
4620 sname = ".ldata.rel.local";
4622 case SECCAT_DATA_REL_RO:
4623 sname = ".ldata.rel.ro";
4625 case SECCAT_DATA_REL_RO_LOCAL:
4626 sname = ".ldata.rel.ro.local";
4630 flags |= SECTION_BSS;
4633 case SECCAT_RODATA_MERGE_STR:
4634 case SECCAT_RODATA_MERGE_STR_INIT:
4635 case SECCAT_RODATA_MERGE_CONST:
4639 case SECCAT_SRODATA:
4646 /* We don't split these for medium model. Place them into
4647 default sections and hope for best. */
4652 /* We might get called with string constants, but get_named_section
4653 doesn't like them as they are not DECLs. Also, we need to set
4654 flags in that case. */
4656 return get_section (sname, flags, NULL);
4657 return get_named_section (decl, sname, reloc);
4660 return default_elf_select_section (decl, reloc, align);
4663 /* Build up a unique section name, expressed as a
4664 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4665 RELOC indicates whether the initial value of EXP requires
4666 link-time relocations. */
4668 static void ATTRIBUTE_UNUSED
4669 x86_64_elf_unique_section (tree decl, int reloc)
4671 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4672 && ix86_in_large_data_p (decl))
4674 const char *prefix = NULL;
4675 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4676 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4678 switch (categorize_decl_for_section (decl, reloc))
4681 case SECCAT_DATA_REL:
4682 case SECCAT_DATA_REL_LOCAL:
4683 case SECCAT_DATA_REL_RO:
4684 case SECCAT_DATA_REL_RO_LOCAL:
4685 prefix = one_only ? ".ld" : ".ldata";
4688 prefix = one_only ? ".lb" : ".lbss";
4691 case SECCAT_RODATA_MERGE_STR:
4692 case SECCAT_RODATA_MERGE_STR_INIT:
4693 case SECCAT_RODATA_MERGE_CONST:
4694 prefix = one_only ? ".lr" : ".lrodata";
4696 case SECCAT_SRODATA:
4703 /* We don't split these for medium model. Place them into
4704 default sections and hope for best. */
4709 const char *name, *linkonce;
4712 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4713 name = targetm.strip_name_encoding (name);
4715 /* If we're using one_only, then there needs to be a .gnu.linkonce
4716 prefix to the section name. */
4717 linkonce = one_only ? ".gnu.linkonce" : "";
4719 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4721 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4725 default_unique_section (decl, reloc);
4728 #ifdef COMMON_ASM_OP
4729 /* This says how to output assembler code to declare an
4730 uninitialized external linkage data object.
4732 For medium model x86-64 we need to use .largecomm opcode for
4735 x86_elf_aligned_common (FILE *file,
4736 const char *name, unsigned HOST_WIDE_INT size,
4739 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4740 && size > (unsigned int)ix86_section_threshold)
4741 fputs (".largecomm\t", file);
4743 fputs (COMMON_ASM_OP, file);
4744 assemble_name (file, name);
4745 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4746 size, align / BITS_PER_UNIT);
4750 /* Utility function for targets to use in implementing
4751 ASM_OUTPUT_ALIGNED_BSS. */
4754 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4755 const char *name, unsigned HOST_WIDE_INT size,
4758 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4759 && size > (unsigned int)ix86_section_threshold)
4760 switch_to_section (get_named_section (decl, ".lbss", 0));
4762 switch_to_section (bss_section);
4763 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4764 #ifdef ASM_DECLARE_OBJECT_NAME
4765 last_assemble_variable_decl = decl;
4766 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4768 /* Standard thing is just output label for the object. */
4769 ASM_OUTPUT_LABEL (file, name);
4770 #endif /* ASM_DECLARE_OBJECT_NAME */
4771 ASM_OUTPUT_SKIP (file, size ? size : 1);
4774 /* Decide whether we must probe the stack before any space allocation
4775 on this target. It's essentially TARGET_STACK_PROBE except when
4776 -fstack-check causes the stack to be already probed differently. */
4779 ix86_target_stack_probe (void)
4781 /* Do not probe the stack twice if static stack checking is enabled. */
4782 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4785 return TARGET_STACK_PROBE;
4788 /* Decide whether we can make a sibling call to a function. DECL is the
4789 declaration of the function being targeted by the call and EXP is the
4790 CALL_EXPR representing the call. */
4793 ix86_function_ok_for_sibcall (tree decl, tree exp)
4795 tree type, decl_or_type;
4798 /* If we are generating position-independent code, we cannot sibcall
4799 optimize any indirect call, or a direct call to a global function,
4800 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4804 && (!decl || !targetm.binds_local_p (decl)))
4807 /* If we need to align the outgoing stack, then sibcalling would
4808 unalign the stack, which may break the called function. */
4809 if (ix86_minimum_incoming_stack_boundary (true)
4810 < PREFERRED_STACK_BOUNDARY)
4815 decl_or_type = decl;
4816 type = TREE_TYPE (decl);
4820 /* We're looking at the CALL_EXPR, we need the type of the function. */
4821 type = CALL_EXPR_FN (exp); /* pointer expression */
4822 type = TREE_TYPE (type); /* pointer type */
4823 type = TREE_TYPE (type); /* function type */
4824 decl_or_type = type;
4827 /* Check that the return value locations are the same. Like
4828 if we are returning floats on the 80387 register stack, we cannot
4829 make a sibcall from a function that doesn't return a float to a
4830 function that does or, conversely, from a function that does return
4831 a float to a function that doesn't; the necessary stack adjustment
4832 would not be executed. This is also the place we notice
4833 differences in the return value ABI. Note that it is ok for one
4834 of the functions to have void return type as long as the return
4835 value of the other is passed in a register. */
4836 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4837 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4839 if (STACK_REG_P (a) || STACK_REG_P (b))
4841 if (!rtx_equal_p (a, b))
4844 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4846 /* Disable sibcall if we need to generate vzeroupper after
4848 if (TARGET_VZEROUPPER
4849 && cfun->machine->callee_return_avx256_p
4850 && !cfun->machine->caller_return_avx256_p)
4853 else if (!rtx_equal_p (a, b))
4858 /* The SYSV ABI has more call-clobbered registers;
4859 disallow sibcalls from MS to SYSV. */
4860 if (cfun->machine->call_abi == MS_ABI
4861 && ix86_function_type_abi (type) == SYSV_ABI)
4866 /* If this call is indirect, we'll need to be able to use a
4867 call-clobbered register for the address of the target function.
4868 Make sure that all such registers are not used for passing
4869 parameters. Note that DLLIMPORT functions are indirect. */
4871 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4873 if (ix86_function_regparm (type, NULL) >= 3)
4875 /* ??? Need to count the actual number of registers to be used,
4876 not the possible number of registers. Fix later. */
4882 /* Otherwise okay. That also includes certain types of indirect calls. */
4886 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4887 and "sseregparm" calling convention attributes;
4888 arguments as in struct attribute_spec.handler. */
4891 ix86_handle_cconv_attribute (tree *node, tree name,
4893 int flags ATTRIBUTE_UNUSED,
4896 if (TREE_CODE (*node) != FUNCTION_TYPE
4897 && TREE_CODE (*node) != METHOD_TYPE
4898 && TREE_CODE (*node) != FIELD_DECL
4899 && TREE_CODE (*node) != TYPE_DECL)
4901 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4903 *no_add_attrs = true;
4907 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4908 if (is_attribute_p ("regparm", name))
4912 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4914 error ("fastcall and regparm attributes are not compatible");
4917 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4919 error ("regparam and thiscall attributes are not compatible");
4922 cst = TREE_VALUE (args);
4923 if (TREE_CODE (cst) != INTEGER_CST)
4925 warning (OPT_Wattributes,
4926 "%qE attribute requires an integer constant argument",
4928 *no_add_attrs = true;
4930 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4932 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4934 *no_add_attrs = true;
4942 /* Do not warn when emulating the MS ABI. */
4943 if ((TREE_CODE (*node) != FUNCTION_TYPE
4944 && TREE_CODE (*node) != METHOD_TYPE)
4945 || ix86_function_type_abi (*node) != MS_ABI)
4946 warning (OPT_Wattributes, "%qE attribute ignored",
4948 *no_add_attrs = true;
4952 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4953 if (is_attribute_p ("fastcall", name))
4955 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4957 error ("fastcall and cdecl attributes are not compatible");
4959 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4961 error ("fastcall and stdcall attributes are not compatible");
4963 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4965 error ("fastcall and regparm attributes are not compatible");
4967 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4969 error ("fastcall and thiscall attributes are not compatible");
4973 /* Can combine stdcall with fastcall (redundant), regparm and
4975 else if (is_attribute_p ("stdcall", name))
4977 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4979 error ("stdcall and cdecl attributes are not compatible");
4981 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4983 error ("stdcall and fastcall attributes are not compatible");
4985 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4987 error ("stdcall and thiscall attributes are not compatible");
4991 /* Can combine cdecl with regparm and sseregparm. */
4992 else if (is_attribute_p ("cdecl", name))
4994 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4996 error ("stdcall and cdecl attributes are not compatible");
4998 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5000 error ("fastcall and cdecl attributes are not compatible");
5002 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5004 error ("cdecl and thiscall attributes are not compatible");
5007 else if (is_attribute_p ("thiscall", name))
5009 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5010 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5012 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5014 error ("stdcall and thiscall attributes are not compatible");
5016 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5018 error ("fastcall and thiscall attributes are not compatible");
5020 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5022 error ("cdecl and thiscall attributes are not compatible");
5026 /* Can combine sseregparm with all attributes. */
5031 /* The transactional memory builtins are implicitly regparm or fastcall
5032 depending on the ABI. Override the generic do-nothing attribute that
5033 these builtins were declared with, and replace it with one of the two
5034 attributes that we expect elsewhere. */
5037 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5038 tree args ATTRIBUTE_UNUSED,
5039 int flags ATTRIBUTE_UNUSED,
5044 /* In no case do we want to add the placeholder attribute. */
5045 *no_add_attrs = true;
5047 /* The 64-bit ABI is unchanged for transactional memory. */
5051 /* ??? Is there a better way to validate 32-bit windows? We have
5052 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5053 if (CHECK_STACK_LIMIT > 0)
5054 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5057 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5058 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5060 decl_attributes (node, alt, flags);
5065 /* This function determines from TYPE the calling-convention. */
5068 ix86_get_callcvt (const_tree type)
5070 unsigned int ret = 0;
5075 return IX86_CALLCVT_CDECL;
5077 attrs = TYPE_ATTRIBUTES (type);
5078 if (attrs != NULL_TREE)
5080 if (lookup_attribute ("cdecl", attrs))
5081 ret |= IX86_CALLCVT_CDECL;
5082 else if (lookup_attribute ("stdcall", attrs))
5083 ret |= IX86_CALLCVT_STDCALL;
5084 else if (lookup_attribute ("fastcall", attrs))
5085 ret |= IX86_CALLCVT_FASTCALL;
5086 else if (lookup_attribute ("thiscall", attrs))
5087 ret |= IX86_CALLCVT_THISCALL;
5089 /* Regparam isn't allowed for thiscall and fastcall. */
5090 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5092 if (lookup_attribute ("regparm", attrs))
5093 ret |= IX86_CALLCVT_REGPARM;
5094 if (lookup_attribute ("sseregparm", attrs))
5095 ret |= IX86_CALLCVT_SSEREGPARM;
5098 if (IX86_BASE_CALLCVT(ret) != 0)
5102 is_stdarg = stdarg_p (type);
5103 if (TARGET_RTD && !is_stdarg)
5104 return IX86_CALLCVT_STDCALL | ret;
5108 || TREE_CODE (type) != METHOD_TYPE
5109 || ix86_function_type_abi (type) != MS_ABI)
5110 return IX86_CALLCVT_CDECL | ret;
5112 return IX86_CALLCVT_THISCALL;
5115 /* Return 0 if the attributes for two types are incompatible, 1 if they
5116 are compatible, and 2 if they are nearly compatible (which causes a
5117 warning to be generated). */
5120 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5122 unsigned int ccvt1, ccvt2;
5124 if (TREE_CODE (type1) != FUNCTION_TYPE
5125 && TREE_CODE (type1) != METHOD_TYPE)
5128 ccvt1 = ix86_get_callcvt (type1);
5129 ccvt2 = ix86_get_callcvt (type2);
5132 if (ix86_function_regparm (type1, NULL)
5133 != ix86_function_regparm (type2, NULL))
5139 /* Return the regparm value for a function with the indicated TYPE and DECL.
5140 DECL may be NULL when calling function indirectly
5141 or considering a libcall. */
5144 ix86_function_regparm (const_tree type, const_tree decl)
5151 return (ix86_function_type_abi (type) == SYSV_ABI
5152 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5153 ccvt = ix86_get_callcvt (type);
5154 regparm = ix86_regparm;
5156 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5158 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5161 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5165 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5167 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5170 /* Use register calling convention for local functions when possible. */
5172 && TREE_CODE (decl) == FUNCTION_DECL
5174 && !(profile_flag && !flag_fentry))
5176 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5177 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5178 if (i && i->local && i->can_change_signature)
5180 int local_regparm, globals = 0, regno;
5182 /* Make sure no regparm register is taken by a
5183 fixed register variable. */
5184 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5185 if (fixed_regs[local_regparm])
5188 /* We don't want to use regparm(3) for nested functions as
5189 these use a static chain pointer in the third argument. */
5190 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5193 /* In 32-bit mode save a register for the split stack. */
5194 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5197 /* Each fixed register usage increases register pressure,
5198 so less registers should be used for argument passing.
5199 This functionality can be overriden by an explicit
5201 for (regno = 0; regno <= DI_REG; regno++)
5202 if (fixed_regs[regno])
5206 = globals < local_regparm ? local_regparm - globals : 0;
5208 if (local_regparm > regparm)
5209 regparm = local_regparm;
5216 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5217 DFmode (2) arguments in SSE registers for a function with the
5218 indicated TYPE and DECL. DECL may be NULL when calling function
5219 indirectly or considering a libcall. Otherwise return 0. */
5222 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5224 gcc_assert (!TARGET_64BIT);
5226 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5227 by the sseregparm attribute. */
5228 if (TARGET_SSEREGPARM
5229 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5236 error ("calling %qD with attribute sseregparm without "
5237 "SSE/SSE2 enabled", decl);
5239 error ("calling %qT with attribute sseregparm without "
5240 "SSE/SSE2 enabled", type);
5248 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5249 (and DFmode for SSE2) arguments in SSE registers. */
5250 if (decl && TARGET_SSE_MATH && optimize
5251 && !(profile_flag && !flag_fentry))
5253 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5254 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5255 if (i && i->local && i->can_change_signature)
5256 return TARGET_SSE2 ? 2 : 1;
5262 /* Return true if EAX is live at the start of the function. Used by
5263 ix86_expand_prologue to determine if we need special help before
5264 calling allocate_stack_worker. */
5267 ix86_eax_live_at_start_p (void)
5269 /* Cheat. Don't bother working forward from ix86_function_regparm
5270 to the function type to whether an actual argument is located in
5271 eax. Instead just look at cfg info, which is still close enough
5272 to correct at this point. This gives false positives for broken
5273 functions that might use uninitialized data that happens to be
5274 allocated in eax, but who cares? */
5275 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5279 ix86_keep_aggregate_return_pointer (tree fntype)
5285 attr = lookup_attribute ("callee_pop_aggregate_return",
5286 TYPE_ATTRIBUTES (fntype));
5288 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5290 /* For 32-bit MS-ABI the default is to keep aggregate
5292 if (ix86_function_type_abi (fntype) == MS_ABI)
5295 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5298 /* Value is the number of bytes of arguments automatically
5299 popped when returning from a subroutine call.
5300 FUNDECL is the declaration node of the function (as a tree),
5301 FUNTYPE is the data type of the function (as a tree),
5302 or for a library call it is an identifier node for the subroutine name.
5303 SIZE is the number of bytes of arguments passed on the stack.
5305 On the 80386, the RTD insn may be used to pop them if the number
5306 of args is fixed, but if the number is variable then the caller
5307 must pop them all. RTD can't be used for library calls now
5308 because the library is compiled with the Unix compiler.
5309 Use of RTD is a selectable option, since it is incompatible with
5310 standard Unix calling sequences. If the option is not selected,
5311 the caller must always pop the args.
5313 The attribute stdcall is equivalent to RTD on a per module basis. */
5316 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5320 /* None of the 64-bit ABIs pop arguments. */
5324 ccvt = ix86_get_callcvt (funtype);
5326 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5327 | IX86_CALLCVT_THISCALL)) != 0
5328 && ! stdarg_p (funtype))
5331 /* Lose any fake structure return argument if it is passed on the stack. */
5332 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5333 && !ix86_keep_aggregate_return_pointer (funtype))
5335 int nregs = ix86_function_regparm (funtype, fundecl);
5337 return GET_MODE_SIZE (Pmode);
5343 /* Argument support functions. */
5345 /* Return true when register may be used to pass function parameters. */
5347 ix86_function_arg_regno_p (int regno)
5350 const int *parm_regs;
5355 return (regno < REGPARM_MAX
5356 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5358 return (regno < REGPARM_MAX
5359 || (TARGET_MMX && MMX_REGNO_P (regno)
5360 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5361 || (TARGET_SSE && SSE_REGNO_P (regno)
5362 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5367 if (SSE_REGNO_P (regno) && TARGET_SSE)
5372 if (TARGET_SSE && SSE_REGNO_P (regno)
5373 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5377 /* TODO: The function should depend on current function ABI but
5378 builtins.c would need updating then. Therefore we use the
5381 /* RAX is used as hidden argument to va_arg functions. */
5382 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5385 if (ix86_abi == MS_ABI)
5386 parm_regs = x86_64_ms_abi_int_parameter_registers;
5388 parm_regs = x86_64_int_parameter_registers;
5389 for (i = 0; i < (ix86_abi == MS_ABI
5390 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5391 if (regno == parm_regs[i])
5396 /* Return if we do not know how to pass TYPE solely in registers. */
5399 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5401 if (must_pass_in_stack_var_size_or_pad (mode, type))
5404 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5405 The layout_type routine is crafty and tries to trick us into passing
5406 currently unsupported vector types on the stack by using TImode. */
5407 return (!TARGET_64BIT && mode == TImode
5408 && type && TREE_CODE (type) != VECTOR_TYPE);
5411 /* It returns the size, in bytes, of the area reserved for arguments passed
5412 in registers for the function represented by fndecl dependent to the used
5415 ix86_reg_parm_stack_space (const_tree fndecl)
5417 enum calling_abi call_abi = SYSV_ABI;
5418 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5419 call_abi = ix86_function_abi (fndecl);
5421 call_abi = ix86_function_type_abi (fndecl);
5422 if (TARGET_64BIT && call_abi == MS_ABI)
5427 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5430 ix86_function_type_abi (const_tree fntype)
5432 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5434 enum calling_abi abi = ix86_abi;
5435 if (abi == SYSV_ABI)
5437 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5440 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5448 ix86_function_ms_hook_prologue (const_tree fn)
5450 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5452 if (decl_function_context (fn) != NULL_TREE)
5453 error_at (DECL_SOURCE_LOCATION (fn),
5454 "ms_hook_prologue is not compatible with nested function");
5461 static enum calling_abi
5462 ix86_function_abi (const_tree fndecl)
5466 return ix86_function_type_abi (TREE_TYPE (fndecl));
5469 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5472 ix86_cfun_abi (void)
5476 return cfun->machine->call_abi;
5479 /* Write the extra assembler code needed to declare a function properly. */
5482 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5485 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5489 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5490 unsigned int filler_cc = 0xcccccccc;
5492 for (i = 0; i < filler_count; i += 4)
5493 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5496 #ifdef SUBTARGET_ASM_UNWIND_INIT
5497 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5500 ASM_OUTPUT_LABEL (asm_out_file, fname);
5502 /* Output magic byte marker, if hot-patch attribute is set. */
5507 /* leaq [%rsp + 0], %rsp */
5508 asm_fprintf (asm_out_file, ASM_BYTE
5509 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5513 /* movl.s %edi, %edi
5515 movl.s %esp, %ebp */
5516 asm_fprintf (asm_out_file, ASM_BYTE
5517 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5523 extern void init_regs (void);
5525 /* Implementation of call abi switching target hook. Specific to FNDECL
5526 the specific call register sets are set. See also
5527 ix86_conditional_register_usage for more details. */
5529 ix86_call_abi_override (const_tree fndecl)
5531 if (fndecl == NULL_TREE)
5532 cfun->machine->call_abi = ix86_abi;
5534 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5537 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5538 expensive re-initialization of init_regs each time we switch function context
5539 since this is needed only during RTL expansion. */
5541 ix86_maybe_switch_abi (void)
5544 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5548 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5549 for a call to a function whose data type is FNTYPE.
5550 For a library call, FNTYPE is 0. */
5553 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5554 tree fntype, /* tree ptr for function decl */
5555 rtx libname, /* SYMBOL_REF of library name or 0 */
5559 struct cgraph_local_info *i;
5562 memset (cum, 0, sizeof (*cum));
5564 /* Initialize for the current callee. */
5567 cfun->machine->callee_pass_avx256_p = false;
5568 cfun->machine->callee_return_avx256_p = false;
5573 i = cgraph_local_info (fndecl);
5574 cum->call_abi = ix86_function_abi (fndecl);
5575 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5580 cum->call_abi = ix86_function_type_abi (fntype);
5582 fnret_type = TREE_TYPE (fntype);
5587 if (TARGET_VZEROUPPER && fnret_type)
5589 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5591 if (function_pass_avx256_p (fnret_value))
5593 /* The return value of this function uses 256bit AVX modes. */
5595 cfun->machine->callee_return_avx256_p = true;
5597 cfun->machine->caller_return_avx256_p = true;
5601 cum->caller = caller;
5603 /* Set up the number of registers to use for passing arguments. */
5605 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5606 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5607 "or subtarget optimization implying it");
5608 cum->nregs = ix86_regparm;
5611 cum->nregs = (cum->call_abi == SYSV_ABI
5612 ? X86_64_REGPARM_MAX
5613 : X86_64_MS_REGPARM_MAX);
5617 cum->sse_nregs = SSE_REGPARM_MAX;
5620 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5621 ? X86_64_SSE_REGPARM_MAX
5622 : X86_64_MS_SSE_REGPARM_MAX);
5626 cum->mmx_nregs = MMX_REGPARM_MAX;
5627 cum->warn_avx = true;
5628 cum->warn_sse = true;
5629 cum->warn_mmx = true;
5631 /* Because type might mismatch in between caller and callee, we need to
5632 use actual type of function for local calls.
5633 FIXME: cgraph_analyze can be told to actually record if function uses
5634 va_start so for local functions maybe_vaarg can be made aggressive
5636 FIXME: once typesytem is fixed, we won't need this code anymore. */
5637 if (i && i->local && i->can_change_signature)
5638 fntype = TREE_TYPE (fndecl);
5639 cum->maybe_vaarg = (fntype
5640 ? (!prototype_p (fntype) || stdarg_p (fntype))
5645 /* If there are variable arguments, then we won't pass anything
5646 in registers in 32-bit mode. */
5647 if (stdarg_p (fntype))
5658 /* Use ecx and edx registers if function has fastcall attribute,
5659 else look for regparm information. */
5662 unsigned int ccvt = ix86_get_callcvt (fntype);
5663 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5666 cum->fastcall = 1; /* Same first register as in fastcall. */
5668 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5674 cum->nregs = ix86_function_regparm (fntype, fndecl);
5677 /* Set up the number of SSE registers used for passing SFmode
5678 and DFmode arguments. Warn for mismatching ABI. */
5679 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5683 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5684 But in the case of vector types, it is some vector mode.
5686 When we have only some of our vector isa extensions enabled, then there
5687 are some modes for which vector_mode_supported_p is false. For these
5688 modes, the generic vector support in gcc will choose some non-vector mode
5689 in order to implement the type. By computing the natural mode, we'll
5690 select the proper ABI location for the operand and not depend on whatever
5691 the middle-end decides to do with these vector types.
5693 The midde-end can't deal with the vector types > 16 bytes. In this
5694 case, we return the original mode and warn ABI change if CUM isn't
5697 static enum machine_mode
5698 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5700 enum machine_mode mode = TYPE_MODE (type);
5702 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5704 HOST_WIDE_INT size = int_size_in_bytes (type);
5705 if ((size == 8 || size == 16 || size == 32)
5706 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5707 && TYPE_VECTOR_SUBPARTS (type) > 1)
5709 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5711 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5712 mode = MIN_MODE_VECTOR_FLOAT;
5714 mode = MIN_MODE_VECTOR_INT;
5716 /* Get the mode which has this inner mode and number of units. */
5717 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5718 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5719 && GET_MODE_INNER (mode) == innermode)
5721 if (size == 32 && !TARGET_AVX)
5723 static bool warnedavx;
5730 warning (0, "AVX vector argument without AVX "
5731 "enabled changes the ABI");
5733 return TYPE_MODE (type);
5746 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5747 this may not agree with the mode that the type system has chosen for the
5748 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5749 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5752 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5757 if (orig_mode != BLKmode)
5758 tmp = gen_rtx_REG (orig_mode, regno);
5761 tmp = gen_rtx_REG (mode, regno);
5762 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5763 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5769 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5770 of this code is to classify each 8bytes of incoming argument by the register
5771 class and assign registers accordingly. */
5773 /* Return the union class of CLASS1 and CLASS2.
5774 See the x86-64 PS ABI for details. */
5776 static enum x86_64_reg_class
5777 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5779 /* Rule #1: If both classes are equal, this is the resulting class. */
5780 if (class1 == class2)
5783 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5785 if (class1 == X86_64_NO_CLASS)
5787 if (class2 == X86_64_NO_CLASS)
5790 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5791 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5792 return X86_64_MEMORY_CLASS;
5794 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5795 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5796 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5797 return X86_64_INTEGERSI_CLASS;
5798 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5799 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5800 return X86_64_INTEGER_CLASS;
5802 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5804 if (class1 == X86_64_X87_CLASS
5805 || class1 == X86_64_X87UP_CLASS
5806 || class1 == X86_64_COMPLEX_X87_CLASS
5807 || class2 == X86_64_X87_CLASS
5808 || class2 == X86_64_X87UP_CLASS
5809 || class2 == X86_64_COMPLEX_X87_CLASS)
5810 return X86_64_MEMORY_CLASS;
5812 /* Rule #6: Otherwise class SSE is used. */
5813 return X86_64_SSE_CLASS;
5816 /* Classify the argument of type TYPE and mode MODE.
5817 CLASSES will be filled by the register class used to pass each word
5818 of the operand. The number of words is returned. In case the parameter
5819 should be passed in memory, 0 is returned. As a special case for zero
5820 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5822 BIT_OFFSET is used internally for handling records and specifies offset
5823 of the offset in bits modulo 256 to avoid overflow cases.
5825 See the x86-64 PS ABI for details.
5829 classify_argument (enum machine_mode mode, const_tree type,
5830 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5832 HOST_WIDE_INT bytes =
5833 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5834 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5836 /* Variable sized entities are always passed/returned in memory. */
5840 if (mode != VOIDmode
5841 && targetm.calls.must_pass_in_stack (mode, type))
5844 if (type && AGGREGATE_TYPE_P (type))
5848 enum x86_64_reg_class subclasses[MAX_CLASSES];
5850 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5854 for (i = 0; i < words; i++)
5855 classes[i] = X86_64_NO_CLASS;
5857 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5858 signalize memory class, so handle it as special case. */
5861 classes[0] = X86_64_NO_CLASS;
5865 /* Classify each field of record and merge classes. */
5866 switch (TREE_CODE (type))
5869 /* And now merge the fields of structure. */
5870 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5872 if (TREE_CODE (field) == FIELD_DECL)
5876 if (TREE_TYPE (field) == error_mark_node)
5879 /* Bitfields are always classified as integer. Handle them
5880 early, since later code would consider them to be
5881 misaligned integers. */
5882 if (DECL_BIT_FIELD (field))
5884 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5885 i < ((int_bit_position (field) + (bit_offset % 64))
5886 + tree_low_cst (DECL_SIZE (field), 0)
5889 merge_classes (X86_64_INTEGER_CLASS,
5896 type = TREE_TYPE (field);
5898 /* Flexible array member is ignored. */
5899 if (TYPE_MODE (type) == BLKmode
5900 && TREE_CODE (type) == ARRAY_TYPE
5901 && TYPE_SIZE (type) == NULL_TREE
5902 && TYPE_DOMAIN (type) != NULL_TREE
5903 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5908 if (!warned && warn_psabi)
5911 inform (input_location,
5912 "the ABI of passing struct with"
5913 " a flexible array member has"
5914 " changed in GCC 4.4");
5918 num = classify_argument (TYPE_MODE (type), type,
5920 (int_bit_position (field)
5921 + bit_offset) % 256);
5924 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5925 for (i = 0; i < num && (i + pos) < words; i++)
5927 merge_classes (subclasses[i], classes[i + pos]);
5934 /* Arrays are handled as small records. */
5937 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5938 TREE_TYPE (type), subclasses, bit_offset);
5942 /* The partial classes are now full classes. */
5943 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5944 subclasses[0] = X86_64_SSE_CLASS;
5945 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5946 && !((bit_offset % 64) == 0 && bytes == 4))
5947 subclasses[0] = X86_64_INTEGER_CLASS;
5949 for (i = 0; i < words; i++)
5950 classes[i] = subclasses[i % num];
5955 case QUAL_UNION_TYPE:
5956 /* Unions are similar to RECORD_TYPE but offset is always 0.
5958 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5960 if (TREE_CODE (field) == FIELD_DECL)
5964 if (TREE_TYPE (field) == error_mark_node)
5967 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5968 TREE_TYPE (field), subclasses,
5972 for (i = 0; i < num; i++)
5973 classes[i] = merge_classes (subclasses[i], classes[i]);
5984 /* When size > 16 bytes, if the first one isn't
5985 X86_64_SSE_CLASS or any other ones aren't
5986 X86_64_SSEUP_CLASS, everything should be passed in
5988 if (classes[0] != X86_64_SSE_CLASS)
5991 for (i = 1; i < words; i++)
5992 if (classes[i] != X86_64_SSEUP_CLASS)
5996 /* Final merger cleanup. */
5997 for (i = 0; i < words; i++)
5999 /* If one class is MEMORY, everything should be passed in
6001 if (classes[i] == X86_64_MEMORY_CLASS)
6004 /* The X86_64_SSEUP_CLASS should be always preceded by
6005 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6006 if (classes[i] == X86_64_SSEUP_CLASS
6007 && classes[i - 1] != X86_64_SSE_CLASS
6008 && classes[i - 1] != X86_64_SSEUP_CLASS)
6010 /* The first one should never be X86_64_SSEUP_CLASS. */
6011 gcc_assert (i != 0);
6012 classes[i] = X86_64_SSE_CLASS;
6015 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6016 everything should be passed in memory. */
6017 if (classes[i] == X86_64_X87UP_CLASS
6018 && (classes[i - 1] != X86_64_X87_CLASS))
6022 /* The first one should never be X86_64_X87UP_CLASS. */
6023 gcc_assert (i != 0);
6024 if (!warned && warn_psabi)
6027 inform (input_location,
6028 "the ABI of passing union with long double"
6029 " has changed in GCC 4.4");
6037 /* Compute alignment needed. We align all types to natural boundaries with
6038 exception of XFmode that is aligned to 64bits. */
6039 if (mode != VOIDmode && mode != BLKmode)
6041 int mode_alignment = GET_MODE_BITSIZE (mode);
6044 mode_alignment = 128;
6045 else if (mode == XCmode)
6046 mode_alignment = 256;
6047 if (COMPLEX_MODE_P (mode))
6048 mode_alignment /= 2;
6049 /* Misaligned fields are always returned in memory. */
6050 if (bit_offset % mode_alignment)
6054 /* for V1xx modes, just use the base mode */
6055 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6056 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6057 mode = GET_MODE_INNER (mode);
6059 /* Classification of atomic types. */
6064 classes[0] = X86_64_SSE_CLASS;
6067 classes[0] = X86_64_SSE_CLASS;
6068 classes[1] = X86_64_SSEUP_CLASS;
6078 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6082 classes[0] = X86_64_INTEGERSI_CLASS;
6085 else if (size <= 64)
6087 classes[0] = X86_64_INTEGER_CLASS;
6090 else if (size <= 64+32)
6092 classes[0] = X86_64_INTEGER_CLASS;
6093 classes[1] = X86_64_INTEGERSI_CLASS;
6096 else if (size <= 64+64)
6098 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6106 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6110 /* OImode shouldn't be used directly. */
6115 if (!(bit_offset % 64))
6116 classes[0] = X86_64_SSESF_CLASS;
6118 classes[0] = X86_64_SSE_CLASS;
6121 classes[0] = X86_64_SSEDF_CLASS;
6124 classes[0] = X86_64_X87_CLASS;
6125 classes[1] = X86_64_X87UP_CLASS;
6128 classes[0] = X86_64_SSE_CLASS;
6129 classes[1] = X86_64_SSEUP_CLASS;
6132 classes[0] = X86_64_SSE_CLASS;
6133 if (!(bit_offset % 64))
6139 if (!warned && warn_psabi)
6142 inform (input_location,
6143 "the ABI of passing structure with complex float"
6144 " member has changed in GCC 4.4");
6146 classes[1] = X86_64_SSESF_CLASS;
6150 classes[0] = X86_64_SSEDF_CLASS;
6151 classes[1] = X86_64_SSEDF_CLASS;
6154 classes[0] = X86_64_COMPLEX_X87_CLASS;
6157 /* This modes is larger than 16 bytes. */
6165 classes[0] = X86_64_SSE_CLASS;
6166 classes[1] = X86_64_SSEUP_CLASS;
6167 classes[2] = X86_64_SSEUP_CLASS;
6168 classes[3] = X86_64_SSEUP_CLASS;
6176 classes[0] = X86_64_SSE_CLASS;
6177 classes[1] = X86_64_SSEUP_CLASS;
6185 classes[0] = X86_64_SSE_CLASS;
6191 gcc_assert (VECTOR_MODE_P (mode));
6196 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6198 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6199 classes[0] = X86_64_INTEGERSI_CLASS;
6201 classes[0] = X86_64_INTEGER_CLASS;
6202 classes[1] = X86_64_INTEGER_CLASS;
6203 return 1 + (bytes > 8);
6207 /* Examine the argument and return set number of register required in each
6208 class. Return 0 iff parameter should be passed in memory. */
6210 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6211 int *int_nregs, int *sse_nregs)
6213 enum x86_64_reg_class regclass[MAX_CLASSES];
6214 int n = classify_argument (mode, type, regclass, 0);
6220 for (n--; n >= 0; n--)
6221 switch (regclass[n])
6223 case X86_64_INTEGER_CLASS:
6224 case X86_64_INTEGERSI_CLASS:
6227 case X86_64_SSE_CLASS:
6228 case X86_64_SSESF_CLASS:
6229 case X86_64_SSEDF_CLASS:
6232 case X86_64_NO_CLASS:
6233 case X86_64_SSEUP_CLASS:
6235 case X86_64_X87_CLASS:
6236 case X86_64_X87UP_CLASS:
6240 case X86_64_COMPLEX_X87_CLASS:
6241 return in_return ? 2 : 0;
6242 case X86_64_MEMORY_CLASS:
6248 /* Construct container for the argument used by GCC interface. See
6249 FUNCTION_ARG for the detailed description. */
6252 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6253 const_tree type, int in_return, int nintregs, int nsseregs,
6254 const int *intreg, int sse_regno)
6256 /* The following variables hold the static issued_error state. */
6257 static bool issued_sse_arg_error;
6258 static bool issued_sse_ret_error;
6259 static bool issued_x87_ret_error;
6261 enum machine_mode tmpmode;
6263 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6264 enum x86_64_reg_class regclass[MAX_CLASSES];
6268 int needed_sseregs, needed_intregs;
6269 rtx exp[MAX_CLASSES];
6272 n = classify_argument (mode, type, regclass, 0);
6275 if (!examine_argument (mode, type, in_return, &needed_intregs,
6278 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6281 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6282 some less clueful developer tries to use floating-point anyway. */
6283 if (needed_sseregs && !TARGET_SSE)
6287 if (!issued_sse_ret_error)
6289 error ("SSE register return with SSE disabled");
6290 issued_sse_ret_error = true;
6293 else if (!issued_sse_arg_error)
6295 error ("SSE register argument with SSE disabled");
6296 issued_sse_arg_error = true;
6301 /* Likewise, error if the ABI requires us to return values in the
6302 x87 registers and the user specified -mno-80387. */
6303 if (!TARGET_80387 && in_return)
6304 for (i = 0; i < n; i++)
6305 if (regclass[i] == X86_64_X87_CLASS
6306 || regclass[i] == X86_64_X87UP_CLASS
6307 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6309 if (!issued_x87_ret_error)
6311 error ("x87 register return with x87 disabled");
6312 issued_x87_ret_error = true;
6317 /* First construct simple cases. Avoid SCmode, since we want to use
6318 single register to pass this type. */
6319 if (n == 1 && mode != SCmode)
6320 switch (regclass[0])
6322 case X86_64_INTEGER_CLASS:
6323 case X86_64_INTEGERSI_CLASS:
6324 return gen_rtx_REG (mode, intreg[0]);
6325 case X86_64_SSE_CLASS:
6326 case X86_64_SSESF_CLASS:
6327 case X86_64_SSEDF_CLASS:
6328 if (mode != BLKmode)
6329 return gen_reg_or_parallel (mode, orig_mode,
6330 SSE_REGNO (sse_regno));
6332 case X86_64_X87_CLASS:
6333 case X86_64_COMPLEX_X87_CLASS:
6334 return gen_rtx_REG (mode, FIRST_STACK_REG);
6335 case X86_64_NO_CLASS:
6336 /* Zero sized array, struct or class. */
6341 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6342 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6343 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6345 && regclass[0] == X86_64_SSE_CLASS
6346 && regclass[1] == X86_64_SSEUP_CLASS
6347 && regclass[2] == X86_64_SSEUP_CLASS
6348 && regclass[3] == X86_64_SSEUP_CLASS
6350 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6353 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6354 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6355 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6356 && regclass[1] == X86_64_INTEGER_CLASS
6357 && (mode == CDImode || mode == TImode || mode == TFmode)
6358 && intreg[0] + 1 == intreg[1])
6359 return gen_rtx_REG (mode, intreg[0]);
6361 /* Otherwise figure out the entries of the PARALLEL. */
6362 for (i = 0; i < n; i++)
6366 switch (regclass[i])
6368 case X86_64_NO_CLASS:
6370 case X86_64_INTEGER_CLASS:
6371 case X86_64_INTEGERSI_CLASS:
6372 /* Merge TImodes on aligned occasions here too. */
6373 if (i * 8 + 8 > bytes)
6374 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6375 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6379 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6380 if (tmpmode == BLKmode)
6382 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6383 gen_rtx_REG (tmpmode, *intreg),
6387 case X86_64_SSESF_CLASS:
6388 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6389 gen_rtx_REG (SFmode,
6390 SSE_REGNO (sse_regno)),
6394 case X86_64_SSEDF_CLASS:
6395 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6396 gen_rtx_REG (DFmode,
6397 SSE_REGNO (sse_regno)),
6401 case X86_64_SSE_CLASS:
6409 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6419 && regclass[1] == X86_64_SSEUP_CLASS
6420 && regclass[2] == X86_64_SSEUP_CLASS
6421 && regclass[3] == X86_64_SSEUP_CLASS);
6428 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6429 gen_rtx_REG (tmpmode,
6430 SSE_REGNO (sse_regno)),
6439 /* Empty aligned struct, union or class. */
6443 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6444 for (i = 0; i < nexps; i++)
6445 XVECEXP (ret, 0, i) = exp [i];
6449 /* Update the data in CUM to advance over an argument of mode MODE
6450 and data type TYPE. (TYPE is null for libcalls where that information
6451 may not be available.) */
6454 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6455 const_tree type, HOST_WIDE_INT bytes,
6456 HOST_WIDE_INT words)
6472 cum->words += words;
6473 cum->nregs -= words;
6474 cum->regno += words;
6476 if (cum->nregs <= 0)
6484 /* OImode shouldn't be used directly. */
6488 if (cum->float_in_sse < 2)
6491 if (cum->float_in_sse < 1)
6508 if (!type || !AGGREGATE_TYPE_P (type))
6510 cum->sse_words += words;
6511 cum->sse_nregs -= 1;
6512 cum->sse_regno += 1;
6513 if (cum->sse_nregs <= 0)
6527 if (!type || !AGGREGATE_TYPE_P (type))
6529 cum->mmx_words += words;
6530 cum->mmx_nregs -= 1;
6531 cum->mmx_regno += 1;
6532 if (cum->mmx_nregs <= 0)
6543 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6544 const_tree type, HOST_WIDE_INT words, bool named)
6546 int int_nregs, sse_nregs;
6548 /* Unnamed 256bit vector mode parameters are passed on stack. */
6549 if (!named && VALID_AVX256_REG_MODE (mode))
6552 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6553 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6555 cum->nregs -= int_nregs;
6556 cum->sse_nregs -= sse_nregs;
6557 cum->regno += int_nregs;
6558 cum->sse_regno += sse_nregs;
6562 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6563 cum->words = (cum->words + align - 1) & ~(align - 1);
6564 cum->words += words;
6569 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6570 HOST_WIDE_INT words)
6572 /* Otherwise, this should be passed indirect. */
6573 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6575 cum->words += words;
6583 /* Update the data in CUM to advance over an argument of mode MODE and
6584 data type TYPE. (TYPE is null for libcalls where that information
6585 may not be available.) */
6588 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6589 const_tree type, bool named)
6591 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6592 HOST_WIDE_INT bytes, words;
6594 if (mode == BLKmode)
6595 bytes = int_size_in_bytes (type);
6597 bytes = GET_MODE_SIZE (mode);
6598 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6601 mode = type_natural_mode (type, NULL);
6603 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6604 function_arg_advance_ms_64 (cum, bytes, words);
6605 else if (TARGET_64BIT)
6606 function_arg_advance_64 (cum, mode, type, words, named);
6608 function_arg_advance_32 (cum, mode, type, bytes, words);
6611 /* Define where to put the arguments to a function.
6612 Value is zero to push the argument on the stack,
6613 or a hard register in which to store the argument.
6615 MODE is the argument's machine mode.
6616 TYPE is the data type of the argument (as a tree).
6617 This is null for libcalls where that information may
6619 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6620 the preceding args and about the function being called.
6621 NAMED is nonzero if this argument is a named parameter
6622 (otherwise it is an extra parameter matching an ellipsis). */
6625 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6626 enum machine_mode orig_mode, const_tree type,
6627 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6629 static bool warnedsse, warnedmmx;
6631 /* Avoid the AL settings for the Unix64 ABI. */
6632 if (mode == VOIDmode)
6648 if (words <= cum->nregs)
6650 int regno = cum->regno;
6652 /* Fastcall allocates the first two DWORD (SImode) or
6653 smaller arguments to ECX and EDX if it isn't an
6659 || (type && AGGREGATE_TYPE_P (type)))
6662 /* ECX not EAX is the first allocated register. */
6663 if (regno == AX_REG)
6666 return gen_rtx_REG (mode, regno);
6671 if (cum->float_in_sse < 2)
6674 if (cum->float_in_sse < 1)
6678 /* In 32bit, we pass TImode in xmm registers. */
6685 if (!type || !AGGREGATE_TYPE_P (type))
6687 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6690 warning (0, "SSE vector argument without SSE enabled "
6694 return gen_reg_or_parallel (mode, orig_mode,
6695 cum->sse_regno + FIRST_SSE_REG);
6700 /* OImode shouldn't be used directly. */
6709 if (!type || !AGGREGATE_TYPE_P (type))
6712 return gen_reg_or_parallel (mode, orig_mode,
6713 cum->sse_regno + FIRST_SSE_REG);
6723 if (!type || !AGGREGATE_TYPE_P (type))
6725 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6728 warning (0, "MMX vector argument without MMX enabled "
6732 return gen_reg_or_parallel (mode, orig_mode,
6733 cum->mmx_regno + FIRST_MMX_REG);
6742 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6743 enum machine_mode orig_mode, const_tree type, bool named)
6745 /* Handle a hidden AL argument containing number of registers
6746 for varargs x86-64 functions. */
6747 if (mode == VOIDmode)
6748 return GEN_INT (cum->maybe_vaarg
6749 ? (cum->sse_nregs < 0
6750 ? X86_64_SSE_REGPARM_MAX
6765 /* Unnamed 256bit vector mode parameters are passed on stack. */
6771 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6773 &x86_64_int_parameter_registers [cum->regno],
6778 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6779 enum machine_mode orig_mode, bool named,
6780 HOST_WIDE_INT bytes)
6784 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6785 We use value of -2 to specify that current function call is MSABI. */
6786 if (mode == VOIDmode)
6787 return GEN_INT (-2);
6789 /* If we've run out of registers, it goes on the stack. */
6790 if (cum->nregs == 0)
6793 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6795 /* Only floating point modes are passed in anything but integer regs. */
6796 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6799 regno = cum->regno + FIRST_SSE_REG;
6804 /* Unnamed floating parameters are passed in both the
6805 SSE and integer registers. */
6806 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6807 t2 = gen_rtx_REG (mode, regno);
6808 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6809 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6810 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6813 /* Handle aggregated types passed in register. */
6814 if (orig_mode == BLKmode)
6816 if (bytes > 0 && bytes <= 8)
6817 mode = (bytes > 4 ? DImode : SImode);
6818 if (mode == BLKmode)
6822 return gen_reg_or_parallel (mode, orig_mode, regno);
6825 /* Return where to put the arguments to a function.
6826 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6828 MODE is the argument's machine mode. TYPE is the data type of the
6829 argument. It is null for libcalls where that information may not be
6830 available. CUM gives information about the preceding args and about
6831 the function being called. NAMED is nonzero if this argument is a
6832 named parameter (otherwise it is an extra parameter matching an
6836 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6837 const_tree type, bool named)
6839 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6840 enum machine_mode mode = omode;
6841 HOST_WIDE_INT bytes, words;
6844 if (mode == BLKmode)
6845 bytes = int_size_in_bytes (type);
6847 bytes = GET_MODE_SIZE (mode);
6848 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6850 /* To simplify the code below, represent vector types with a vector mode
6851 even if MMX/SSE are not active. */
6852 if (type && TREE_CODE (type) == VECTOR_TYPE)
6853 mode = type_natural_mode (type, cum);
6855 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6856 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6857 else if (TARGET_64BIT)
6858 arg = function_arg_64 (cum, mode, omode, type, named);
6860 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6862 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6864 /* This argument uses 256bit AVX modes. */
6866 cfun->machine->callee_pass_avx256_p = true;
6868 cfun->machine->caller_pass_avx256_p = true;
6874 /* A C expression that indicates when an argument must be passed by
6875 reference. If nonzero for an argument, a copy of that argument is
6876 made in memory and a pointer to the argument is passed instead of
6877 the argument itself. The pointer is passed in whatever way is
6878 appropriate for passing a pointer to that type. */
6881 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6882 enum machine_mode mode ATTRIBUTE_UNUSED,
6883 const_tree type, bool named ATTRIBUTE_UNUSED)
6885 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6887 /* See Windows x64 Software Convention. */
6888 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6890 int msize = (int) GET_MODE_SIZE (mode);
6893 /* Arrays are passed by reference. */
6894 if (TREE_CODE (type) == ARRAY_TYPE)
6897 if (AGGREGATE_TYPE_P (type))
6899 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6900 are passed by reference. */
6901 msize = int_size_in_bytes (type);
6905 /* __m128 is passed by reference. */
6907 case 1: case 2: case 4: case 8:
6913 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6919 /* Return true when TYPE should be 128bit aligned for 32bit argument
6920 passing ABI. XXX: This function is obsolete and is only used for
6921 checking psABI compatibility with previous versions of GCC. */
6924 ix86_compat_aligned_value_p (const_tree type)
6926 enum machine_mode mode = TYPE_MODE (type);
6927 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6931 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6933 if (TYPE_ALIGN (type) < 128)
6936 if (AGGREGATE_TYPE_P (type))
6938 /* Walk the aggregates recursively. */
6939 switch (TREE_CODE (type))
6943 case QUAL_UNION_TYPE:
6947 /* Walk all the structure fields. */
6948 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6950 if (TREE_CODE (field) == FIELD_DECL
6951 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6958 /* Just for use if some languages passes arrays by value. */
6959 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6970 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6971 XXX: This function is obsolete and is only used for checking psABI
6972 compatibility with previous versions of GCC. */
6975 ix86_compat_function_arg_boundary (enum machine_mode mode,
6976 const_tree type, unsigned int align)
6978 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6979 natural boundaries. */
6980 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6982 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6983 make an exception for SSE modes since these require 128bit
6986 The handling here differs from field_alignment. ICC aligns MMX
6987 arguments to 4 byte boundaries, while structure fields are aligned
6988 to 8 byte boundaries. */
6991 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6992 align = PARM_BOUNDARY;
6996 if (!ix86_compat_aligned_value_p (type))
6997 align = PARM_BOUNDARY;
7000 if (align > BIGGEST_ALIGNMENT)
7001 align = BIGGEST_ALIGNMENT;
7005 /* Return true when TYPE should be 128bit aligned for 32bit argument
7009 ix86_contains_aligned_value_p (const_tree type)
7011 enum machine_mode mode = TYPE_MODE (type);
7013 if (mode == XFmode || mode == XCmode)
7016 if (TYPE_ALIGN (type) < 128)
7019 if (AGGREGATE_TYPE_P (type))
7021 /* Walk the aggregates recursively. */
7022 switch (TREE_CODE (type))
7026 case QUAL_UNION_TYPE:
7030 /* Walk all the structure fields. */
7031 for (field = TYPE_FIELDS (type);
7033 field = DECL_CHAIN (field))
7035 if (TREE_CODE (field) == FIELD_DECL
7036 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7043 /* Just for use if some languages passes arrays by value. */
7044 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7053 return TYPE_ALIGN (type) >= 128;
7058 /* Gives the alignment boundary, in bits, of an argument with the
7059 specified mode and type. */
7062 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7067 /* Since the main variant type is used for call, we convert it to
7068 the main variant type. */
7069 type = TYPE_MAIN_VARIANT (type);
7070 align = TYPE_ALIGN (type);
7073 align = GET_MODE_ALIGNMENT (mode);
7074 if (align < PARM_BOUNDARY)
7075 align = PARM_BOUNDARY;
7079 unsigned int saved_align = align;
7083 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7086 if (mode == XFmode || mode == XCmode)
7087 align = PARM_BOUNDARY;
7089 else if (!ix86_contains_aligned_value_p (type))
7090 align = PARM_BOUNDARY;
7093 align = PARM_BOUNDARY;
7098 && align != ix86_compat_function_arg_boundary (mode, type,
7102 inform (input_location,
7103 "The ABI for passing parameters with %d-byte"
7104 " alignment has changed in GCC 4.6",
7105 align / BITS_PER_UNIT);
7112 /* Return true if N is a possible register number of function value. */
7115 ix86_function_value_regno_p (const unsigned int regno)
7122 case FIRST_FLOAT_REG:
7123 /* TODO: The function should depend on current function ABI but
7124 builtins.c would need updating then. Therefore we use the
7126 if (TARGET_64BIT && ix86_abi == MS_ABI)
7128 return TARGET_FLOAT_RETURNS_IN_80387;
7134 if (TARGET_MACHO || TARGET_64BIT)
7142 /* Define how to find the value returned by a function.
7143 VALTYPE is the data type of the value (as a tree).
7144 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7145 otherwise, FUNC is 0. */
7148 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7149 const_tree fntype, const_tree fn)
7153 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7154 we normally prevent this case when mmx is not available. However
7155 some ABIs may require the result to be returned like DImode. */
7156 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7157 regno = FIRST_MMX_REG;
7159 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7160 we prevent this case when sse is not available. However some ABIs
7161 may require the result to be returned like integer TImode. */
7162 else if (mode == TImode
7163 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7164 regno = FIRST_SSE_REG;
7166 /* 32-byte vector modes in %ymm0. */
7167 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7168 regno = FIRST_SSE_REG;
7170 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7171 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7172 regno = FIRST_FLOAT_REG;
7174 /* Most things go in %eax. */
7177 /* Override FP return register with %xmm0 for local functions when
7178 SSE math is enabled or for functions with sseregparm attribute. */
7179 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7181 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7182 if ((sse_level >= 1 && mode == SFmode)
7183 || (sse_level == 2 && mode == DFmode))
7184 regno = FIRST_SSE_REG;
7187 /* OImode shouldn't be used directly. */
7188 gcc_assert (mode != OImode);
7190 return gen_rtx_REG (orig_mode, regno);
7194 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7199 /* Handle libcalls, which don't provide a type node. */
7200 if (valtype == NULL)
7214 regno = FIRST_SSE_REG;
7218 regno = FIRST_FLOAT_REG;
7226 return gen_rtx_REG (mode, regno);
7228 else if (POINTER_TYPE_P (valtype))
7230 /* Pointers are always returned in Pmode. */
7234 ret = construct_container (mode, orig_mode, valtype, 1,
7235 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7236 x86_64_int_return_registers, 0);
7238 /* For zero sized structures, construct_container returns NULL, but we
7239 need to keep rest of compiler happy by returning meaningful value. */
7241 ret = gen_rtx_REG (orig_mode, AX_REG);
7247 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7249 unsigned int regno = AX_REG;
7253 switch (GET_MODE_SIZE (mode))
7256 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7257 && !COMPLEX_MODE_P (mode))
7258 regno = FIRST_SSE_REG;
7262 if (mode == SFmode || mode == DFmode)
7263 regno = FIRST_SSE_REG;
7269 return gen_rtx_REG (orig_mode, regno);
7273 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7274 enum machine_mode orig_mode, enum machine_mode mode)
7276 const_tree fn, fntype;
7279 if (fntype_or_decl && DECL_P (fntype_or_decl))
7280 fn = fntype_or_decl;
7281 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7283 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7284 return function_value_ms_64 (orig_mode, mode);
7285 else if (TARGET_64BIT)
7286 return function_value_64 (orig_mode, mode, valtype);
7288 return function_value_32 (orig_mode, mode, fntype, fn);
7292 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7293 bool outgoing ATTRIBUTE_UNUSED)
7295 enum machine_mode mode, orig_mode;
7297 orig_mode = TYPE_MODE (valtype);
7298 mode = type_natural_mode (valtype, NULL);
7299 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7302 /* Pointer function arguments and return values are promoted to Pmode. */
7304 static enum machine_mode
7305 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7306 int *punsignedp, const_tree fntype,
7309 if (type != NULL_TREE && POINTER_TYPE_P (type))
7311 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7314 return default_promote_function_mode (type, mode, punsignedp, fntype,
7319 ix86_libcall_value (enum machine_mode mode)
7321 return ix86_function_value_1 (NULL, NULL, mode, mode);
7324 /* Return true iff type is returned in memory. */
7326 static bool ATTRIBUTE_UNUSED
7327 return_in_memory_32 (const_tree type, enum machine_mode mode)
7331 if (mode == BLKmode)
7334 size = int_size_in_bytes (type);
7336 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7339 if (VECTOR_MODE_P (mode) || mode == TImode)
7341 /* User-created vectors small enough to fit in EAX. */
7345 /* MMX/3dNow values are returned in MM0,
7346 except when it doesn't exits or the ABI prescribes otherwise. */
7348 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7350 /* SSE values are returned in XMM0, except when it doesn't exist. */
7354 /* AVX values are returned in YMM0, except when it doesn't exist. */
7365 /* OImode shouldn't be used directly. */
7366 gcc_assert (mode != OImode);
7371 static bool ATTRIBUTE_UNUSED
7372 return_in_memory_64 (const_tree type, enum machine_mode mode)
7374 int needed_intregs, needed_sseregs;
7375 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7378 static bool ATTRIBUTE_UNUSED
7379 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7381 HOST_WIDE_INT size = int_size_in_bytes (type);
7383 /* __m128 is returned in xmm0. */
7384 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7385 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7388 /* Otherwise, the size must be exactly in [1248]. */
7389 return size != 1 && size != 2 && size != 4 && size != 8;
7393 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7395 #ifdef SUBTARGET_RETURN_IN_MEMORY
7396 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7398 const enum machine_mode mode = type_natural_mode (type, NULL);
7402 if (ix86_function_type_abi (fntype) == MS_ABI)
7403 return return_in_memory_ms_64 (type, mode);
7405 return return_in_memory_64 (type, mode);
7408 return return_in_memory_32 (type, mode);
7412 /* When returning SSE vector types, we have a choice of either
7413 (1) being abi incompatible with a -march switch, or
7414 (2) generating an error.
7415 Given no good solution, I think the safest thing is one warning.
7416 The user won't be able to use -Werror, but....
7418 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7419 called in response to actually generating a caller or callee that
7420 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7421 via aggregate_value_p for general type probing from tree-ssa. */
7424 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7426 static bool warnedsse, warnedmmx;
7428 if (!TARGET_64BIT && type)
7430 /* Look at the return type of the function, not the function type. */
7431 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7433 if (!TARGET_SSE && !warnedsse)
7436 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7439 warning (0, "SSE vector return without SSE enabled "
7444 if (!TARGET_MMX && !warnedmmx)
7446 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7449 warning (0, "MMX vector return without MMX enabled "
7459 /* Create the va_list data type. */
7461 /* Returns the calling convention specific va_list date type.
7462 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7465 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7467 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7469 /* For i386 we use plain pointer to argument area. */
7470 if (!TARGET_64BIT || abi == MS_ABI)
7471 return build_pointer_type (char_type_node);
7473 record = lang_hooks.types.make_type (RECORD_TYPE);
7474 type_decl = build_decl (BUILTINS_LOCATION,
7475 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7477 f_gpr = build_decl (BUILTINS_LOCATION,
7478 FIELD_DECL, get_identifier ("gp_offset"),
7479 unsigned_type_node);
7480 f_fpr = build_decl (BUILTINS_LOCATION,
7481 FIELD_DECL, get_identifier ("fp_offset"),
7482 unsigned_type_node);
7483 f_ovf = build_decl (BUILTINS_LOCATION,
7484 FIELD_DECL, get_identifier ("overflow_arg_area"),
7486 f_sav = build_decl (BUILTINS_LOCATION,
7487 FIELD_DECL, get_identifier ("reg_save_area"),
7490 va_list_gpr_counter_field = f_gpr;
7491 va_list_fpr_counter_field = f_fpr;
7493 DECL_FIELD_CONTEXT (f_gpr) = record;
7494 DECL_FIELD_CONTEXT (f_fpr) = record;
7495 DECL_FIELD_CONTEXT (f_ovf) = record;
7496 DECL_FIELD_CONTEXT (f_sav) = record;
7498 TYPE_STUB_DECL (record) = type_decl;
7499 TYPE_NAME (record) = type_decl;
7500 TYPE_FIELDS (record) = f_gpr;
7501 DECL_CHAIN (f_gpr) = f_fpr;
7502 DECL_CHAIN (f_fpr) = f_ovf;
7503 DECL_CHAIN (f_ovf) = f_sav;
7505 layout_type (record);
7507 /* The correct type is an array type of one element. */
7508 return build_array_type (record, build_index_type (size_zero_node));
7511 /* Setup the builtin va_list data type and for 64-bit the additional
7512 calling convention specific va_list data types. */
7515 ix86_build_builtin_va_list (void)
7517 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7519 /* Initialize abi specific va_list builtin types. */
7523 if (ix86_abi == MS_ABI)
7525 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7526 if (TREE_CODE (t) != RECORD_TYPE)
7527 t = build_variant_type_copy (t);
7528 sysv_va_list_type_node = t;
7533 if (TREE_CODE (t) != RECORD_TYPE)
7534 t = build_variant_type_copy (t);
7535 sysv_va_list_type_node = t;
7537 if (ix86_abi != MS_ABI)
7539 t = ix86_build_builtin_va_list_abi (MS_ABI);
7540 if (TREE_CODE (t) != RECORD_TYPE)
7541 t = build_variant_type_copy (t);
7542 ms_va_list_type_node = t;
7547 if (TREE_CODE (t) != RECORD_TYPE)
7548 t = build_variant_type_copy (t);
7549 ms_va_list_type_node = t;
7556 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7559 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7565 /* GPR size of varargs save area. */
7566 if (cfun->va_list_gpr_size)
7567 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7569 ix86_varargs_gpr_size = 0;
7571 /* FPR size of varargs save area. We don't need it if we don't pass
7572 anything in SSE registers. */
7573 if (TARGET_SSE && cfun->va_list_fpr_size)
7574 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7576 ix86_varargs_fpr_size = 0;
7578 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7581 save_area = frame_pointer_rtx;
7582 set = get_varargs_alias_set ();
7584 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7585 if (max > X86_64_REGPARM_MAX)
7586 max = X86_64_REGPARM_MAX;
7588 for (i = cum->regno; i < max; i++)
7590 mem = gen_rtx_MEM (Pmode,
7591 plus_constant (save_area, i * UNITS_PER_WORD));
7592 MEM_NOTRAP_P (mem) = 1;
7593 set_mem_alias_set (mem, set);
7594 emit_move_insn (mem, gen_rtx_REG (Pmode,
7595 x86_64_int_parameter_registers[i]));
7598 if (ix86_varargs_fpr_size)
7600 enum machine_mode smode;
7603 /* Now emit code to save SSE registers. The AX parameter contains number
7604 of SSE parameter registers used to call this function, though all we
7605 actually check here is the zero/non-zero status. */
7607 label = gen_label_rtx ();
7608 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7609 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7612 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7613 we used movdqa (i.e. TImode) instead? Perhaps even better would
7614 be if we could determine the real mode of the data, via a hook
7615 into pass_stdarg. Ignore all that for now. */
7617 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7618 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7620 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7621 if (max > X86_64_SSE_REGPARM_MAX)
7622 max = X86_64_SSE_REGPARM_MAX;
7624 for (i = cum->sse_regno; i < max; ++i)
7626 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7627 mem = gen_rtx_MEM (smode, mem);
7628 MEM_NOTRAP_P (mem) = 1;
7629 set_mem_alias_set (mem, set);
7630 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7632 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7640 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7642 alias_set_type set = get_varargs_alias_set ();
7645 /* Reset to zero, as there might be a sysv vaarg used
7647 ix86_varargs_gpr_size = 0;
7648 ix86_varargs_fpr_size = 0;
7650 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7654 mem = gen_rtx_MEM (Pmode,
7655 plus_constant (virtual_incoming_args_rtx,
7656 i * UNITS_PER_WORD));
7657 MEM_NOTRAP_P (mem) = 1;
7658 set_mem_alias_set (mem, set);
7660 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7661 emit_move_insn (mem, reg);
7666 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7667 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7670 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7671 CUMULATIVE_ARGS next_cum;
7674 /* This argument doesn't appear to be used anymore. Which is good,
7675 because the old code here didn't suppress rtl generation. */
7676 gcc_assert (!no_rtl);
7681 fntype = TREE_TYPE (current_function_decl);
7683 /* For varargs, we do not want to skip the dummy va_dcl argument.
7684 For stdargs, we do want to skip the last named argument. */
7686 if (stdarg_p (fntype))
7687 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7690 if (cum->call_abi == MS_ABI)
7691 setup_incoming_varargs_ms_64 (&next_cum);
7693 setup_incoming_varargs_64 (&next_cum);
7696 /* Checks if TYPE is of kind va_list char *. */
7699 is_va_list_char_pointer (tree type)
7703 /* For 32-bit it is always true. */
7706 canonic = ix86_canonical_va_list_type (type);
7707 return (canonic == ms_va_list_type_node
7708 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7711 /* Implement va_start. */
7714 ix86_va_start (tree valist, rtx nextarg)
7716 HOST_WIDE_INT words, n_gpr, n_fpr;
7717 tree f_gpr, f_fpr, f_ovf, f_sav;
7718 tree gpr, fpr, ovf, sav, t;
7722 if (flag_split_stack
7723 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7725 unsigned int scratch_regno;
7727 /* When we are splitting the stack, we can't refer to the stack
7728 arguments using internal_arg_pointer, because they may be on
7729 the old stack. The split stack prologue will arrange to
7730 leave a pointer to the old stack arguments in a scratch
7731 register, which we here copy to a pseudo-register. The split
7732 stack prologue can't set the pseudo-register directly because
7733 it (the prologue) runs before any registers have been saved. */
7735 scratch_regno = split_stack_prologue_scratch_regno ();
7736 if (scratch_regno != INVALID_REGNUM)
7740 reg = gen_reg_rtx (Pmode);
7741 cfun->machine->split_stack_varargs_pointer = reg;
7744 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7748 push_topmost_sequence ();
7749 emit_insn_after (seq, entry_of_function ());
7750 pop_topmost_sequence ();
7754 /* Only 64bit target needs something special. */
7755 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7757 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7758 std_expand_builtin_va_start (valist, nextarg);
7763 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7764 next = expand_binop (ptr_mode, add_optab,
7765 cfun->machine->split_stack_varargs_pointer,
7766 crtl->args.arg_offset_rtx,
7767 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7768 convert_move (va_r, next, 0);
7773 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7774 f_fpr = DECL_CHAIN (f_gpr);
7775 f_ovf = DECL_CHAIN (f_fpr);
7776 f_sav = DECL_CHAIN (f_ovf);
7778 valist = build_simple_mem_ref (valist);
7779 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7780 /* The following should be folded into the MEM_REF offset. */
7781 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7783 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7785 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7787 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7790 /* Count number of gp and fp argument registers used. */
7791 words = crtl->args.info.words;
7792 n_gpr = crtl->args.info.regno;
7793 n_fpr = crtl->args.info.sse_regno;
7795 if (cfun->va_list_gpr_size)
7797 type = TREE_TYPE (gpr);
7798 t = build2 (MODIFY_EXPR, type,
7799 gpr, build_int_cst (type, n_gpr * 8));
7800 TREE_SIDE_EFFECTS (t) = 1;
7801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7804 if (TARGET_SSE && cfun->va_list_fpr_size)
7806 type = TREE_TYPE (fpr);
7807 t = build2 (MODIFY_EXPR, type, fpr,
7808 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7809 TREE_SIDE_EFFECTS (t) = 1;
7810 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7813 /* Find the overflow area. */
7814 type = TREE_TYPE (ovf);
7815 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7816 ovf_rtx = crtl->args.internal_arg_pointer;
7818 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7819 t = make_tree (type, ovf_rtx);
7821 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7822 t = build2 (MODIFY_EXPR, type, ovf, t);
7823 TREE_SIDE_EFFECTS (t) = 1;
7824 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7826 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7828 /* Find the register save area.
7829 Prologue of the function save it right above stack frame. */
7830 type = TREE_TYPE (sav);
7831 t = make_tree (type, frame_pointer_rtx);
7832 if (!ix86_varargs_gpr_size)
7833 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7834 t = build2 (MODIFY_EXPR, type, sav, t);
7835 TREE_SIDE_EFFECTS (t) = 1;
7836 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7840 /* Implement va_arg. */
7843 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7846 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7847 tree f_gpr, f_fpr, f_ovf, f_sav;
7848 tree gpr, fpr, ovf, sav, t;
7850 tree lab_false, lab_over = NULL_TREE;
7855 enum machine_mode nat_mode;
7856 unsigned int arg_boundary;
7858 /* Only 64bit target needs something special. */
7859 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7860 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7862 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7863 f_fpr = DECL_CHAIN (f_gpr);
7864 f_ovf = DECL_CHAIN (f_fpr);
7865 f_sav = DECL_CHAIN (f_ovf);
7867 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7868 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7869 valist = build_va_arg_indirect_ref (valist);
7870 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7871 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7872 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7874 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7876 type = build_pointer_type (type);
7877 size = int_size_in_bytes (type);
7878 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7880 nat_mode = type_natural_mode (type, NULL);
7889 /* Unnamed 256bit vector mode parameters are passed on stack. */
7890 if (!TARGET_64BIT_MS_ABI)
7897 container = construct_container (nat_mode, TYPE_MODE (type),
7898 type, 0, X86_64_REGPARM_MAX,
7899 X86_64_SSE_REGPARM_MAX, intreg,
7904 /* Pull the value out of the saved registers. */
7906 addr = create_tmp_var (ptr_type_node, "addr");
7910 int needed_intregs, needed_sseregs;
7912 tree int_addr, sse_addr;
7914 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7915 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7917 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7919 need_temp = (!REG_P (container)
7920 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7921 || TYPE_ALIGN (type) > 128));
7923 /* In case we are passing structure, verify that it is consecutive block
7924 on the register save area. If not we need to do moves. */
7925 if (!need_temp && !REG_P (container))
7927 /* Verify that all registers are strictly consecutive */
7928 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7932 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7934 rtx slot = XVECEXP (container, 0, i);
7935 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7936 || INTVAL (XEXP (slot, 1)) != i * 16)
7944 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7946 rtx slot = XVECEXP (container, 0, i);
7947 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7948 || INTVAL (XEXP (slot, 1)) != i * 8)
7960 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7961 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7964 /* First ensure that we fit completely in registers. */
7967 t = build_int_cst (TREE_TYPE (gpr),
7968 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7969 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7970 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7971 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7972 gimplify_and_add (t, pre_p);
7976 t = build_int_cst (TREE_TYPE (fpr),
7977 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7978 + X86_64_REGPARM_MAX * 8);
7979 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7980 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7981 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7982 gimplify_and_add (t, pre_p);
7985 /* Compute index to start of area used for integer regs. */
7988 /* int_addr = gpr + sav; */
7989 t = fold_build_pointer_plus (sav, gpr);
7990 gimplify_assign (int_addr, t, pre_p);
7994 /* sse_addr = fpr + sav; */
7995 t = fold_build_pointer_plus (sav, fpr);
7996 gimplify_assign (sse_addr, t, pre_p);
8000 int i, prev_size = 0;
8001 tree temp = create_tmp_var (type, "va_arg_tmp");
8004 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8005 gimplify_assign (addr, t, pre_p);
8007 for (i = 0; i < XVECLEN (container, 0); i++)
8009 rtx slot = XVECEXP (container, 0, i);
8010 rtx reg = XEXP (slot, 0);
8011 enum machine_mode mode = GET_MODE (reg);
8017 tree dest_addr, dest;
8018 int cur_size = GET_MODE_SIZE (mode);
8020 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8021 prev_size = INTVAL (XEXP (slot, 1));
8022 if (prev_size + cur_size > size)
8024 cur_size = size - prev_size;
8025 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8026 if (mode == BLKmode)
8029 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8030 if (mode == GET_MODE (reg))
8031 addr_type = build_pointer_type (piece_type);
8033 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8035 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8038 if (SSE_REGNO_P (REGNO (reg)))
8040 src_addr = sse_addr;
8041 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8045 src_addr = int_addr;
8046 src_offset = REGNO (reg) * 8;
8048 src_addr = fold_convert (addr_type, src_addr);
8049 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8051 dest_addr = fold_convert (daddr_type, addr);
8052 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8053 if (cur_size == GET_MODE_SIZE (mode))
8055 src = build_va_arg_indirect_ref (src_addr);
8056 dest = build_va_arg_indirect_ref (dest_addr);
8058 gimplify_assign (dest, src, pre_p);
8063 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8064 3, dest_addr, src_addr,
8065 size_int (cur_size));
8066 gimplify_and_add (copy, pre_p);
8068 prev_size += cur_size;
8074 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8075 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8076 gimplify_assign (gpr, t, pre_p);
8081 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8082 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8083 gimplify_assign (fpr, t, pre_p);
8086 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8088 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8091 /* ... otherwise out of the overflow area. */
8093 /* When we align parameter on stack for caller, if the parameter
8094 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8095 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8096 here with caller. */
8097 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8098 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8099 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8101 /* Care for on-stack alignment if needed. */
8102 if (arg_boundary <= 64 || size == 0)
8106 HOST_WIDE_INT align = arg_boundary / 8;
8107 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8108 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8109 build_int_cst (TREE_TYPE (t), -align));
8112 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8113 gimplify_assign (addr, t, pre_p);
8115 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8116 gimplify_assign (unshare_expr (ovf), t, pre_p);
8119 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8121 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8122 addr = fold_convert (ptrtype, addr);
8125 addr = build_va_arg_indirect_ref (addr);
8126 return build_va_arg_indirect_ref (addr);
8129 /* Return true if OPNUM's MEM should be matched
8130 in movabs* patterns. */
8133 ix86_check_movabs (rtx insn, int opnum)
8137 set = PATTERN (insn);
8138 if (GET_CODE (set) == PARALLEL)
8139 set = XVECEXP (set, 0, 0);
8140 gcc_assert (GET_CODE (set) == SET);
8141 mem = XEXP (set, opnum);
8142 while (GET_CODE (mem) == SUBREG)
8143 mem = SUBREG_REG (mem);
8144 gcc_assert (MEM_P (mem));
8145 return volatile_ok || !MEM_VOLATILE_P (mem);
8148 /* Initialize the table of extra 80387 mathematical constants. */
8151 init_ext_80387_constants (void)
8153 static const char * cst[5] =
8155 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8156 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8157 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8158 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8159 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8163 for (i = 0; i < 5; i++)
8165 real_from_string (&ext_80387_constants_table[i], cst[i]);
8166 /* Ensure each constant is rounded to XFmode precision. */
8167 real_convert (&ext_80387_constants_table[i],
8168 XFmode, &ext_80387_constants_table[i]);
8171 ext_80387_constants_init = 1;
8174 /* Return non-zero if the constant is something that
8175 can be loaded with a special instruction. */
8178 standard_80387_constant_p (rtx x)
8180 enum machine_mode mode = GET_MODE (x);
8184 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8187 if (x == CONST0_RTX (mode))
8189 if (x == CONST1_RTX (mode))
8192 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8194 /* For XFmode constants, try to find a special 80387 instruction when
8195 optimizing for size or on those CPUs that benefit from them. */
8197 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8201 if (! ext_80387_constants_init)
8202 init_ext_80387_constants ();
8204 for (i = 0; i < 5; i++)
8205 if (real_identical (&r, &ext_80387_constants_table[i]))
8209 /* Load of the constant -0.0 or -1.0 will be split as
8210 fldz;fchs or fld1;fchs sequence. */
8211 if (real_isnegzero (&r))
8213 if (real_identical (&r, &dconstm1))
8219 /* Return the opcode of the special instruction to be used to load
8223 standard_80387_constant_opcode (rtx x)
8225 switch (standard_80387_constant_p (x))
8249 /* Return the CONST_DOUBLE representing the 80387 constant that is
8250 loaded by the specified special instruction. The argument IDX
8251 matches the return value from standard_80387_constant_p. */
8254 standard_80387_constant_rtx (int idx)
8258 if (! ext_80387_constants_init)
8259 init_ext_80387_constants ();
8275 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8279 /* Return 1 if X is all 0s and 2 if x is all 1s
8280 in supported SSE/AVX vector mode. */
8283 standard_sse_constant_p (rtx x)
8285 enum machine_mode mode = GET_MODE (x);
8287 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8289 if (vector_all_ones_operand (x, mode))
8311 /* Return the opcode of the special instruction to be used to load
8315 standard_sse_constant_opcode (rtx insn, rtx x)
8317 switch (standard_sse_constant_p (x))
8320 switch (get_attr_mode (insn))
8323 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8324 return "%vpxor\t%0, %d0";
8326 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8327 return "%vxorpd\t%0, %d0";
8329 return "%vxorps\t%0, %d0";
8332 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8333 return "vpxor\t%x0, %x0, %x0";
8335 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8336 return "vxorpd\t%x0, %x0, %x0";
8338 return "vxorps\t%x0, %x0, %x0";
8346 return "vpcmpeqd\t%0, %0, %0";
8348 return "pcmpeqd\t%0, %0";
8356 /* Returns true if OP contains a symbol reference */
8359 symbolic_reference_mentioned_p (rtx op)
8364 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8367 fmt = GET_RTX_FORMAT (GET_CODE (op));
8368 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8374 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8375 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8379 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8386 /* Return true if it is appropriate to emit `ret' instructions in the
8387 body of a function. Do this only if the epilogue is simple, needing a
8388 couple of insns. Prior to reloading, we can't tell how many registers
8389 must be saved, so return false then. Return false if there is no frame
8390 marker to de-allocate. */
8393 ix86_can_use_return_insn_p (void)
8395 struct ix86_frame frame;
8397 if (! reload_completed || frame_pointer_needed)
8400 /* Don't allow more than 32k pop, since that's all we can do
8401 with one instruction. */
8402 if (crtl->args.pops_args && crtl->args.size >= 32768)
8405 ix86_compute_frame_layout (&frame);
8406 return (frame.stack_pointer_offset == UNITS_PER_WORD
8407 && (frame.nregs + frame.nsseregs) == 0);
8410 /* Value should be nonzero if functions must have frame pointers.
8411 Zero means the frame pointer need not be set up (and parms may
8412 be accessed via the stack pointer) in functions that seem suitable. */
8415 ix86_frame_pointer_required (void)
8417 /* If we accessed previous frames, then the generated code expects
8418 to be able to access the saved ebp value in our frame. */
8419 if (cfun->machine->accesses_prev_frame)
8422 /* Several x86 os'es need a frame pointer for other reasons,
8423 usually pertaining to setjmp. */
8424 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8427 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8428 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8431 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8432 allocation is 4GB. */
8433 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
8436 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8437 turns off the frame pointer by default. Turn it back on now if
8438 we've not got a leaf function. */
8439 if (TARGET_OMIT_LEAF_FRAME_POINTER
8440 && (!current_function_is_leaf
8441 || ix86_current_function_calls_tls_descriptor))
8444 if (crtl->profile && !flag_fentry)
8450 /* Record that the current function accesses previous call frames. */
8453 ix86_setup_frame_addresses (void)
8455 cfun->machine->accesses_prev_frame = 1;
8458 #ifndef USE_HIDDEN_LINKONCE
8459 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8460 # define USE_HIDDEN_LINKONCE 1
8462 # define USE_HIDDEN_LINKONCE 0
8466 static int pic_labels_used;
8468 /* Fills in the label name that should be used for a pc thunk for
8469 the given register. */
8472 get_pc_thunk_name (char name[32], unsigned int regno)
8474 gcc_assert (!TARGET_64BIT);
8476 if (USE_HIDDEN_LINKONCE)
8477 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8479 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8483 /* This function generates code for -fpic that loads %ebx with
8484 the return address of the caller and then returns. */
8487 ix86_code_end (void)
8492 for (regno = AX_REG; regno <= SP_REG; regno++)
8497 if (!(pic_labels_used & (1 << regno)))
8500 get_pc_thunk_name (name, regno);
8502 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8503 get_identifier (name),
8504 build_function_type_list (void_type_node, NULL_TREE));
8505 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8506 NULL_TREE, void_type_node);
8507 TREE_PUBLIC (decl) = 1;
8508 TREE_STATIC (decl) = 1;
8513 switch_to_section (darwin_sections[text_coal_section]);
8514 fputs ("\t.weak_definition\t", asm_out_file);
8515 assemble_name (asm_out_file, name);
8516 fputs ("\n\t.private_extern\t", asm_out_file);
8517 assemble_name (asm_out_file, name);
8518 putc ('\n', asm_out_file);
8519 ASM_OUTPUT_LABEL (asm_out_file, name);
8520 DECL_WEAK (decl) = 1;
8524 if (USE_HIDDEN_LINKONCE)
8526 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8528 targetm.asm_out.unique_section (decl, 0);
8529 switch_to_section (get_named_section (decl, NULL, 0));
8531 targetm.asm_out.globalize_label (asm_out_file, name);
8532 fputs ("\t.hidden\t", asm_out_file);
8533 assemble_name (asm_out_file, name);
8534 putc ('\n', asm_out_file);
8535 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8539 switch_to_section (text_section);
8540 ASM_OUTPUT_LABEL (asm_out_file, name);
8543 DECL_INITIAL (decl) = make_node (BLOCK);
8544 current_function_decl = decl;
8545 init_function_start (decl);
8546 first_function_block_is_cold = false;
8547 /* Make sure unwind info is emitted for the thunk if needed. */
8548 final_start_function (emit_barrier (), asm_out_file, 1);
8550 /* Pad stack IP move with 4 instructions (two NOPs count
8551 as one instruction). */
8552 if (TARGET_PAD_SHORT_FUNCTION)
8557 fputs ("\tnop\n", asm_out_file);
8560 xops[0] = gen_rtx_REG (Pmode, regno);
8561 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8562 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8563 fputs ("\tret\n", asm_out_file);
8564 final_end_function ();
8565 init_insn_lengths ();
8566 free_after_compilation (cfun);
8568 current_function_decl = NULL;
8571 if (flag_split_stack)
8572 file_end_indicate_split_stack ();
8575 /* Emit code for the SET_GOT patterns. */
8578 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8584 if (TARGET_VXWORKS_RTP && flag_pic)
8586 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8587 xops[2] = gen_rtx_MEM (Pmode,
8588 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8589 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8591 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8592 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8593 an unadorned address. */
8594 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8595 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8596 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8600 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8604 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8606 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8609 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8610 is what will be referenced by the Mach-O PIC subsystem. */
8612 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8615 targetm.asm_out.internal_label (asm_out_file, "L",
8616 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8621 get_pc_thunk_name (name, REGNO (dest));
8622 pic_labels_used |= 1 << REGNO (dest);
8624 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8625 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8626 output_asm_insn ("call\t%X2", xops);
8627 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8628 is what will be referenced by the Mach-O PIC subsystem. */
8631 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8633 targetm.asm_out.internal_label (asm_out_file, "L",
8634 CODE_LABEL_NUMBER (label));
8639 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8644 /* Generate an "push" pattern for input ARG. */
8649 struct machine_function *m = cfun->machine;
8651 if (m->fs.cfa_reg == stack_pointer_rtx)
8652 m->fs.cfa_offset += UNITS_PER_WORD;
8653 m->fs.sp_offset += UNITS_PER_WORD;
8655 return gen_rtx_SET (VOIDmode,
8657 gen_rtx_PRE_DEC (Pmode,
8658 stack_pointer_rtx)),
8662 /* Generate an "pop" pattern for input ARG. */
8667 return gen_rtx_SET (VOIDmode,
8670 gen_rtx_POST_INC (Pmode,
8671 stack_pointer_rtx)));
8674 /* Return >= 0 if there is an unused call-clobbered register available
8675 for the entire function. */
8678 ix86_select_alt_pic_regnum (void)
8680 if (current_function_is_leaf
8682 && !ix86_current_function_calls_tls_descriptor)
8685 /* Can't use the same register for both PIC and DRAP. */
8687 drap = REGNO (crtl->drap_reg);
8690 for (i = 2; i >= 0; --i)
8691 if (i != drap && !df_regs_ever_live_p (i))
8695 return INVALID_REGNUM;
8698 /* Return TRUE if we need to save REGNO. */
8701 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8703 if (pic_offset_table_rtx
8704 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8705 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8707 || crtl->calls_eh_return
8708 || crtl->uses_const_pool))
8709 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8711 if (crtl->calls_eh_return && maybe_eh_return)
8716 unsigned test = EH_RETURN_DATA_REGNO (i);
8717 if (test == INVALID_REGNUM)
8724 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8727 return (df_regs_ever_live_p (regno)
8728 && !call_used_regs[regno]
8729 && !fixed_regs[regno]
8730 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8733 /* Return number of saved general prupose registers. */
8736 ix86_nsaved_regs (void)
8741 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8742 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8747 /* Return number of saved SSE registrers. */
8750 ix86_nsaved_sseregs (void)
8755 if (!TARGET_64BIT_MS_ABI)
8757 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8758 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8763 /* Given FROM and TO register numbers, say whether this elimination is
8764 allowed. If stack alignment is needed, we can only replace argument
8765 pointer with hard frame pointer, or replace frame pointer with stack
8766 pointer. Otherwise, frame pointer elimination is automatically
8767 handled and all other eliminations are valid. */
8770 ix86_can_eliminate (const int from, const int to)
8772 if (stack_realign_fp)
8773 return ((from == ARG_POINTER_REGNUM
8774 && to == HARD_FRAME_POINTER_REGNUM)
8775 || (from == FRAME_POINTER_REGNUM
8776 && to == STACK_POINTER_REGNUM));
8778 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8781 /* Return the offset between two registers, one to be eliminated, and the other
8782 its replacement, at the start of a routine. */
8785 ix86_initial_elimination_offset (int from, int to)
8787 struct ix86_frame frame;
8788 ix86_compute_frame_layout (&frame);
8790 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8791 return frame.hard_frame_pointer_offset;
8792 else if (from == FRAME_POINTER_REGNUM
8793 && to == HARD_FRAME_POINTER_REGNUM)
8794 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8797 gcc_assert (to == STACK_POINTER_REGNUM);
8799 if (from == ARG_POINTER_REGNUM)
8800 return frame.stack_pointer_offset;
8802 gcc_assert (from == FRAME_POINTER_REGNUM);
8803 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8807 /* In a dynamically-aligned function, we can't know the offset from
8808 stack pointer to frame pointer, so we must ensure that setjmp
8809 eliminates fp against the hard fp (%ebp) rather than trying to
8810 index from %esp up to the top of the frame across a gap that is
8811 of unknown (at compile-time) size. */
8813 ix86_builtin_setjmp_frame_value (void)
8815 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8818 /* When using -fsplit-stack, the allocation routines set a field in
8819 the TCB to the bottom of the stack plus this much space, measured
8822 #define SPLIT_STACK_AVAILABLE 256
8824 /* Fill structure ix86_frame about frame of currently computed function. */
8827 ix86_compute_frame_layout (struct ix86_frame *frame)
8829 unsigned int stack_alignment_needed;
8830 HOST_WIDE_INT offset;
8831 unsigned int preferred_alignment;
8832 HOST_WIDE_INT size = get_frame_size ();
8833 HOST_WIDE_INT to_allocate;
8835 frame->nregs = ix86_nsaved_regs ();
8836 frame->nsseregs = ix86_nsaved_sseregs ();
8838 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8839 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8841 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8842 function prologues and leaf. */
8843 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8844 && (!current_function_is_leaf || cfun->calls_alloca != 0
8845 || ix86_current_function_calls_tls_descriptor))
8847 preferred_alignment = 16;
8848 stack_alignment_needed = 16;
8849 crtl->preferred_stack_boundary = 128;
8850 crtl->stack_alignment_needed = 128;
8853 gcc_assert (!size || stack_alignment_needed);
8854 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8855 gcc_assert (preferred_alignment <= stack_alignment_needed);
8857 /* For SEH we have to limit the amount of code movement into the prologue.
8858 At present we do this via a BLOCKAGE, at which point there's very little
8859 scheduling that can be done, which means that there's very little point
8860 in doing anything except PUSHs. */
8862 cfun->machine->use_fast_prologue_epilogue = false;
8864 /* During reload iteration the amount of registers saved can change.
8865 Recompute the value as needed. Do not recompute when amount of registers
8866 didn't change as reload does multiple calls to the function and does not
8867 expect the decision to change within single iteration. */
8868 else if (!optimize_function_for_size_p (cfun)
8869 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8871 int count = frame->nregs;
8872 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8874 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8876 /* The fast prologue uses move instead of push to save registers. This
8877 is significantly longer, but also executes faster as modern hardware
8878 can execute the moves in parallel, but can't do that for push/pop.
8880 Be careful about choosing what prologue to emit: When function takes
8881 many instructions to execute we may use slow version as well as in
8882 case function is known to be outside hot spot (this is known with
8883 feedback only). Weight the size of function by number of registers
8884 to save as it is cheap to use one or two push instructions but very
8885 slow to use many of them. */
8887 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8888 if (node->frequency < NODE_FREQUENCY_NORMAL
8889 || (flag_branch_probabilities
8890 && node->frequency < NODE_FREQUENCY_HOT))
8891 cfun->machine->use_fast_prologue_epilogue = false;
8893 cfun->machine->use_fast_prologue_epilogue
8894 = !expensive_function_p (count);
8897 frame->save_regs_using_mov
8898 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8899 /* If static stack checking is enabled and done with probes,
8900 the registers need to be saved before allocating the frame. */
8901 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8903 /* Skip return address. */
8904 offset = UNITS_PER_WORD;
8906 /* Skip pushed static chain. */
8907 if (ix86_static_chain_on_stack)
8908 offset += UNITS_PER_WORD;
8910 /* Skip saved base pointer. */
8911 if (frame_pointer_needed)
8912 offset += UNITS_PER_WORD;
8913 frame->hfp_save_offset = offset;
8915 /* The traditional frame pointer location is at the top of the frame. */
8916 frame->hard_frame_pointer_offset = offset;
8918 /* Register save area */
8919 offset += frame->nregs * UNITS_PER_WORD;
8920 frame->reg_save_offset = offset;
8922 /* On SEH target, registers are pushed just before the frame pointer
8925 frame->hard_frame_pointer_offset = offset;
8927 /* Align and set SSE register save area. */
8928 if (frame->nsseregs)
8930 /* The only ABI that has saved SSE registers (Win64) also has a
8931 16-byte aligned default stack, and thus we don't need to be
8932 within the re-aligned local stack frame to save them. */
8933 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8934 offset = (offset + 16 - 1) & -16;
8935 offset += frame->nsseregs * 16;
8937 frame->sse_reg_save_offset = offset;
8939 /* The re-aligned stack starts here. Values before this point are not
8940 directly comparable with values below this point. In order to make
8941 sure that no value happens to be the same before and after, force
8942 the alignment computation below to add a non-zero value. */
8943 if (stack_realign_fp)
8944 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8947 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8948 offset += frame->va_arg_size;
8950 /* Align start of frame for local function. */
8951 if (stack_realign_fp
8952 || offset != frame->sse_reg_save_offset
8954 || !current_function_is_leaf
8955 || cfun->calls_alloca
8956 || ix86_current_function_calls_tls_descriptor)
8957 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8959 /* Frame pointer points here. */
8960 frame->frame_pointer_offset = offset;
8964 /* Add outgoing arguments area. Can be skipped if we eliminated
8965 all the function calls as dead code.
8966 Skipping is however impossible when function calls alloca. Alloca
8967 expander assumes that last crtl->outgoing_args_size
8968 of stack frame are unused. */
8969 if (ACCUMULATE_OUTGOING_ARGS
8970 && (!current_function_is_leaf || cfun->calls_alloca
8971 || ix86_current_function_calls_tls_descriptor))
8973 offset += crtl->outgoing_args_size;
8974 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8977 frame->outgoing_arguments_size = 0;
8979 /* Align stack boundary. Only needed if we're calling another function
8981 if (!current_function_is_leaf || cfun->calls_alloca
8982 || ix86_current_function_calls_tls_descriptor)
8983 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8985 /* We've reached end of stack frame. */
8986 frame->stack_pointer_offset = offset;
8988 /* Size prologue needs to allocate. */
8989 to_allocate = offset - frame->sse_reg_save_offset;
8991 if ((!to_allocate && frame->nregs <= 1)
8992 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8993 frame->save_regs_using_mov = false;
8995 if (ix86_using_red_zone ()
8996 && current_function_sp_is_unchanging
8997 && current_function_is_leaf
8998 && !ix86_current_function_calls_tls_descriptor)
9000 frame->red_zone_size = to_allocate;
9001 if (frame->save_regs_using_mov)
9002 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9003 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9004 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9007 frame->red_zone_size = 0;
9008 frame->stack_pointer_offset -= frame->red_zone_size;
9010 /* The SEH frame pointer location is near the bottom of the frame.
9011 This is enforced by the fact that the difference between the
9012 stack pointer and the frame pointer is limited to 240 bytes in
9013 the unwind data structure. */
9018 /* If we can leave the frame pointer where it is, do so. Also, returns
9019 the establisher frame for __builtin_frame_address (0). */
9020 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9021 if (diff <= SEH_MAX_FRAME_SIZE
9022 && (diff > 240 || (diff & 15) != 0)
9023 && !crtl->accesses_prior_frames)
9025 /* Ideally we'd determine what portion of the local stack frame
9026 (within the constraint of the lowest 240) is most heavily used.
9027 But without that complication, simply bias the frame pointer
9028 by 128 bytes so as to maximize the amount of the local stack
9029 frame that is addressable with 8-bit offsets. */
9030 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9035 /* This is semi-inlined memory_address_length, but simplified
9036 since we know that we're always dealing with reg+offset, and
9037 to avoid having to create and discard all that rtl. */
9040 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9046 /* EBP and R13 cannot be encoded without an offset. */
9047 len = (regno == BP_REG || regno == R13_REG);
9049 else if (IN_RANGE (offset, -128, 127))
9052 /* ESP and R12 must be encoded with a SIB byte. */
9053 if (regno == SP_REG || regno == R12_REG)
9059 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9060 The valid base registers are taken from CFUN->MACHINE->FS. */
9063 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9065 const struct machine_function *m = cfun->machine;
9066 rtx base_reg = NULL;
9067 HOST_WIDE_INT base_offset = 0;
9069 if (m->use_fast_prologue_epilogue)
9071 /* Choose the base register most likely to allow the most scheduling
9072 opportunities. Generally FP is valid througout the function,
9073 while DRAP must be reloaded within the epilogue. But choose either
9074 over the SP due to increased encoding size. */
9078 base_reg = hard_frame_pointer_rtx;
9079 base_offset = m->fs.fp_offset - cfa_offset;
9081 else if (m->fs.drap_valid)
9083 base_reg = crtl->drap_reg;
9084 base_offset = 0 - cfa_offset;
9086 else if (m->fs.sp_valid)
9088 base_reg = stack_pointer_rtx;
9089 base_offset = m->fs.sp_offset - cfa_offset;
9094 HOST_WIDE_INT toffset;
9097 /* Choose the base register with the smallest address encoding.
9098 With a tie, choose FP > DRAP > SP. */
9101 base_reg = stack_pointer_rtx;
9102 base_offset = m->fs.sp_offset - cfa_offset;
9103 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9105 if (m->fs.drap_valid)
9107 toffset = 0 - cfa_offset;
9108 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9111 base_reg = crtl->drap_reg;
9112 base_offset = toffset;
9118 toffset = m->fs.fp_offset - cfa_offset;
9119 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9122 base_reg = hard_frame_pointer_rtx;
9123 base_offset = toffset;
9128 gcc_assert (base_reg != NULL);
9130 return plus_constant (base_reg, base_offset);
9133 /* Emit code to save registers in the prologue. */
9136 ix86_emit_save_regs (void)
9141 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9142 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9144 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9145 RTX_FRAME_RELATED_P (insn) = 1;
9149 /* Emit a single register save at CFA - CFA_OFFSET. */
9152 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9153 HOST_WIDE_INT cfa_offset)
9155 struct machine_function *m = cfun->machine;
9156 rtx reg = gen_rtx_REG (mode, regno);
9157 rtx mem, addr, base, insn;
9159 addr = choose_baseaddr (cfa_offset);
9160 mem = gen_frame_mem (mode, addr);
9162 /* For SSE saves, we need to indicate the 128-bit alignment. */
9163 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9165 insn = emit_move_insn (mem, reg);
9166 RTX_FRAME_RELATED_P (insn) = 1;
9169 if (GET_CODE (base) == PLUS)
9170 base = XEXP (base, 0);
9171 gcc_checking_assert (REG_P (base));
9173 /* When saving registers into a re-aligned local stack frame, avoid
9174 any tricky guessing by dwarf2out. */
9175 if (m->fs.realigned)
9177 gcc_checking_assert (stack_realign_drap);
9179 if (regno == REGNO (crtl->drap_reg))
9181 /* A bit of a hack. We force the DRAP register to be saved in
9182 the re-aligned stack frame, which provides us with a copy
9183 of the CFA that will last past the prologue. Install it. */
9184 gcc_checking_assert (cfun->machine->fs.fp_valid);
9185 addr = plus_constant (hard_frame_pointer_rtx,
9186 cfun->machine->fs.fp_offset - cfa_offset);
9187 mem = gen_rtx_MEM (mode, addr);
9188 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9192 /* The frame pointer is a stable reference within the
9193 aligned frame. Use it. */
9194 gcc_checking_assert (cfun->machine->fs.fp_valid);
9195 addr = plus_constant (hard_frame_pointer_rtx,
9196 cfun->machine->fs.fp_offset - cfa_offset);
9197 mem = gen_rtx_MEM (mode, addr);
9198 add_reg_note (insn, REG_CFA_EXPRESSION,
9199 gen_rtx_SET (VOIDmode, mem, reg));
9203 /* The memory may not be relative to the current CFA register,
9204 which means that we may need to generate a new pattern for
9205 use by the unwind info. */
9206 else if (base != m->fs.cfa_reg)
9208 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9209 mem = gen_rtx_MEM (mode, addr);
9210 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9214 /* Emit code to save registers using MOV insns.
9215 First register is stored at CFA - CFA_OFFSET. */
9217 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9221 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9222 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9224 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9225 cfa_offset -= UNITS_PER_WORD;
9229 /* Emit code to save SSE registers using MOV insns.
9230 First register is stored at CFA - CFA_OFFSET. */
9232 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9236 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9237 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9239 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9244 static GTY(()) rtx queued_cfa_restores;
9246 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9247 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9248 Don't add the note if the previously saved value will be left untouched
9249 within stack red-zone till return, as unwinders can find the same value
9250 in the register and on the stack. */
9253 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9255 if (!crtl->shrink_wrapped
9256 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9261 add_reg_note (insn, REG_CFA_RESTORE, reg);
9262 RTX_FRAME_RELATED_P (insn) = 1;
9266 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9269 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9272 ix86_add_queued_cfa_restore_notes (rtx insn)
9275 if (!queued_cfa_restores)
9277 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9279 XEXP (last, 1) = REG_NOTES (insn);
9280 REG_NOTES (insn) = queued_cfa_restores;
9281 queued_cfa_restores = NULL_RTX;
9282 RTX_FRAME_RELATED_P (insn) = 1;
9285 /* Expand prologue or epilogue stack adjustment.
9286 The pattern exist to put a dependency on all ebp-based memory accesses.
9287 STYLE should be negative if instructions should be marked as frame related,
9288 zero if %r11 register is live and cannot be freely used and positive
9292 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9293 int style, bool set_cfa)
9295 struct machine_function *m = cfun->machine;
9297 bool add_frame_related_expr = false;
9300 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9301 else if (x86_64_immediate_operand (offset, DImode))
9302 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9306 /* r11 is used by indirect sibcall return as well, set before the
9307 epilogue and used after the epilogue. */
9309 tmp = gen_rtx_REG (DImode, R11_REG);
9312 gcc_assert (src != hard_frame_pointer_rtx
9313 && dest != hard_frame_pointer_rtx);
9314 tmp = hard_frame_pointer_rtx;
9316 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9318 add_frame_related_expr = true;
9320 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9323 insn = emit_insn (insn);
9325 ix86_add_queued_cfa_restore_notes (insn);
9331 gcc_assert (m->fs.cfa_reg == src);
9332 m->fs.cfa_offset += INTVAL (offset);
9333 m->fs.cfa_reg = dest;
9335 r = gen_rtx_PLUS (Pmode, src, offset);
9336 r = gen_rtx_SET (VOIDmode, dest, r);
9337 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9338 RTX_FRAME_RELATED_P (insn) = 1;
9342 RTX_FRAME_RELATED_P (insn) = 1;
9343 if (add_frame_related_expr)
9345 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9346 r = gen_rtx_SET (VOIDmode, dest, r);
9347 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9351 if (dest == stack_pointer_rtx)
9353 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9354 bool valid = m->fs.sp_valid;
9356 if (src == hard_frame_pointer_rtx)
9358 valid = m->fs.fp_valid;
9359 ooffset = m->fs.fp_offset;
9361 else if (src == crtl->drap_reg)
9363 valid = m->fs.drap_valid;
9368 /* Else there are two possibilities: SP itself, which we set
9369 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9370 taken care of this by hand along the eh_return path. */
9371 gcc_checking_assert (src == stack_pointer_rtx
9372 || offset == const0_rtx);
9375 m->fs.sp_offset = ooffset - INTVAL (offset);
9376 m->fs.sp_valid = valid;
9380 /* Find an available register to be used as dynamic realign argument
9381 pointer regsiter. Such a register will be written in prologue and
9382 used in begin of body, so it must not be
9383 1. parameter passing register.
9385 We reuse static-chain register if it is available. Otherwise, we
9386 use DI for i386 and R13 for x86-64. We chose R13 since it has
9389 Return: the regno of chosen register. */
9392 find_drap_reg (void)
9394 tree decl = cfun->decl;
9398 /* Use R13 for nested function or function need static chain.
9399 Since function with tail call may use any caller-saved
9400 registers in epilogue, DRAP must not use caller-saved
9401 register in such case. */
9402 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9409 /* Use DI for nested function or function need static chain.
9410 Since function with tail call may use any caller-saved
9411 registers in epilogue, DRAP must not use caller-saved
9412 register in such case. */
9413 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9416 /* Reuse static chain register if it isn't used for parameter
9418 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9420 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9421 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9428 /* Return minimum incoming stack alignment. */
9431 ix86_minimum_incoming_stack_boundary (bool sibcall)
9433 unsigned int incoming_stack_boundary;
9435 /* Prefer the one specified at command line. */
9436 if (ix86_user_incoming_stack_boundary)
9437 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9438 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9439 if -mstackrealign is used, it isn't used for sibcall check and
9440 estimated stack alignment is 128bit. */
9443 && ix86_force_align_arg_pointer
9444 && crtl->stack_alignment_estimated == 128)
9445 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9447 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9449 /* Incoming stack alignment can be changed on individual functions
9450 via force_align_arg_pointer attribute. We use the smallest
9451 incoming stack boundary. */
9452 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9453 && lookup_attribute (ix86_force_align_arg_pointer_string,
9454 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9455 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9457 /* The incoming stack frame has to be aligned at least at
9458 parm_stack_boundary. */
9459 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9460 incoming_stack_boundary = crtl->parm_stack_boundary;
9462 /* Stack at entrance of main is aligned by runtime. We use the
9463 smallest incoming stack boundary. */
9464 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9465 && DECL_NAME (current_function_decl)
9466 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9467 && DECL_FILE_SCOPE_P (current_function_decl))
9468 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9470 return incoming_stack_boundary;
9473 /* Update incoming stack boundary and estimated stack alignment. */
9476 ix86_update_stack_boundary (void)
9478 ix86_incoming_stack_boundary
9479 = ix86_minimum_incoming_stack_boundary (false);
9481 /* x86_64 vararg needs 16byte stack alignment for register save
9485 && crtl->stack_alignment_estimated < 128)
9486 crtl->stack_alignment_estimated = 128;
9489 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9490 needed or an rtx for DRAP otherwise. */
9493 ix86_get_drap_rtx (void)
9495 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9496 crtl->need_drap = true;
9498 if (stack_realign_drap)
9500 /* Assign DRAP to vDRAP and returns vDRAP */
9501 unsigned int regno = find_drap_reg ();
9506 arg_ptr = gen_rtx_REG (Pmode, regno);
9507 crtl->drap_reg = arg_ptr;
9510 drap_vreg = copy_to_reg (arg_ptr);
9514 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9517 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9518 RTX_FRAME_RELATED_P (insn) = 1;
9526 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9529 ix86_internal_arg_pointer (void)
9531 return virtual_incoming_args_rtx;
9534 struct scratch_reg {
9539 /* Return a short-lived scratch register for use on function entry.
9540 In 32-bit mode, it is valid only after the registers are saved
9541 in the prologue. This register must be released by means of
9542 release_scratch_register_on_entry once it is dead. */
9545 get_scratch_register_on_entry (struct scratch_reg *sr)
9553 /* We always use R11 in 64-bit mode. */
9558 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9560 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9561 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9562 int regparm = ix86_function_regparm (fntype, decl);
9564 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9566 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9567 for the static chain register. */
9568 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9569 && drap_regno != AX_REG)
9571 else if (regparm < 2 && drap_regno != DX_REG)
9573 /* ecx is the static chain register. */
9574 else if (regparm < 3 && !fastcall_p && !static_chain_p
9575 && drap_regno != CX_REG)
9577 else if (ix86_save_reg (BX_REG, true))
9579 /* esi is the static chain register. */
9580 else if (!(regparm == 3 && static_chain_p)
9581 && ix86_save_reg (SI_REG, true))
9583 else if (ix86_save_reg (DI_REG, true))
9587 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9592 sr->reg = gen_rtx_REG (Pmode, regno);
9595 rtx insn = emit_insn (gen_push (sr->reg));
9596 RTX_FRAME_RELATED_P (insn) = 1;
9600 /* Release a scratch register obtained from the preceding function. */
9603 release_scratch_register_on_entry (struct scratch_reg *sr)
9607 rtx x, insn = emit_insn (gen_pop (sr->reg));
9609 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9610 RTX_FRAME_RELATED_P (insn) = 1;
9611 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9612 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9613 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9617 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9619 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9622 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9624 /* We skip the probe for the first interval + a small dope of 4 words and
9625 probe that many bytes past the specified size to maintain a protection
9626 area at the botton of the stack. */
9627 const int dope = 4 * UNITS_PER_WORD;
9628 rtx size_rtx = GEN_INT (size), last;
9630 /* See if we have a constant small number of probes to generate. If so,
9631 that's the easy case. The run-time loop is made up of 11 insns in the
9632 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9633 for n # of intervals. */
9634 if (size <= 5 * PROBE_INTERVAL)
9636 HOST_WIDE_INT i, adjust;
9637 bool first_probe = true;
9639 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9640 values of N from 1 until it exceeds SIZE. If only one probe is
9641 needed, this will not generate any code. Then adjust and probe
9642 to PROBE_INTERVAL + SIZE. */
9643 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9647 adjust = 2 * PROBE_INTERVAL + dope;
9648 first_probe = false;
9651 adjust = PROBE_INTERVAL;
9653 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9654 plus_constant (stack_pointer_rtx, -adjust)));
9655 emit_stack_probe (stack_pointer_rtx);
9659 adjust = size + PROBE_INTERVAL + dope;
9661 adjust = size + PROBE_INTERVAL - i;
9663 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9664 plus_constant (stack_pointer_rtx, -adjust)));
9665 emit_stack_probe (stack_pointer_rtx);
9667 /* Adjust back to account for the additional first interval. */
9668 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9669 plus_constant (stack_pointer_rtx,
9670 PROBE_INTERVAL + dope)));
9673 /* Otherwise, do the same as above, but in a loop. Note that we must be
9674 extra careful with variables wrapping around because we might be at
9675 the very top (or the very bottom) of the address space and we have
9676 to be able to handle this case properly; in particular, we use an
9677 equality test for the loop condition. */
9680 HOST_WIDE_INT rounded_size;
9681 struct scratch_reg sr;
9683 get_scratch_register_on_entry (&sr);
9686 /* Step 1: round SIZE to the previous multiple of the interval. */
9688 rounded_size = size & -PROBE_INTERVAL;
9691 /* Step 2: compute initial and final value of the loop counter. */
9693 /* SP = SP_0 + PROBE_INTERVAL. */
9694 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9695 plus_constant (stack_pointer_rtx,
9696 - (PROBE_INTERVAL + dope))));
9698 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9699 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9700 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9701 gen_rtx_PLUS (Pmode, sr.reg,
9702 stack_pointer_rtx)));
9707 while (SP != LAST_ADDR)
9709 SP = SP + PROBE_INTERVAL
9713 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9714 values of N from 1 until it is equal to ROUNDED_SIZE. */
9716 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9719 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9720 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9722 if (size != rounded_size)
9724 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9725 plus_constant (stack_pointer_rtx,
9726 rounded_size - size)));
9727 emit_stack_probe (stack_pointer_rtx);
9730 /* Adjust back to account for the additional first interval. */
9731 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9732 plus_constant (stack_pointer_rtx,
9733 PROBE_INTERVAL + dope)));
9735 release_scratch_register_on_entry (&sr);
9738 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9740 /* Even if the stack pointer isn't the CFA register, we need to correctly
9741 describe the adjustments made to it, in particular differentiate the
9742 frame-related ones from the frame-unrelated ones. */
9745 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9746 XVECEXP (expr, 0, 0)
9747 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9748 plus_constant (stack_pointer_rtx, -size));
9749 XVECEXP (expr, 0, 1)
9750 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9751 plus_constant (stack_pointer_rtx,
9752 PROBE_INTERVAL + dope + size));
9753 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9754 RTX_FRAME_RELATED_P (last) = 1;
9756 cfun->machine->fs.sp_offset += size;
9759 /* Make sure nothing is scheduled before we are done. */
9760 emit_insn (gen_blockage ());
9763 /* Adjust the stack pointer up to REG while probing it. */
9766 output_adjust_stack_and_probe (rtx reg)
9768 static int labelno = 0;
9769 char loop_lab[32], end_lab[32];
9772 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9773 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9775 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9777 /* Jump to END_LAB if SP == LAST_ADDR. */
9778 xops[0] = stack_pointer_rtx;
9780 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9781 fputs ("\tje\t", asm_out_file);
9782 assemble_name_raw (asm_out_file, end_lab);
9783 fputc ('\n', asm_out_file);
9785 /* SP = SP + PROBE_INTERVAL. */
9786 xops[1] = GEN_INT (PROBE_INTERVAL);
9787 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9790 xops[1] = const0_rtx;
9791 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9793 fprintf (asm_out_file, "\tjmp\t");
9794 assemble_name_raw (asm_out_file, loop_lab);
9795 fputc ('\n', asm_out_file);
9797 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9802 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9803 inclusive. These are offsets from the current stack pointer. */
9806 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9808 /* See if we have a constant small number of probes to generate. If so,
9809 that's the easy case. The run-time loop is made up of 7 insns in the
9810 generic case while the compile-time loop is made up of n insns for n #
9812 if (size <= 7 * PROBE_INTERVAL)
9816 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9817 it exceeds SIZE. If only one probe is needed, this will not
9818 generate any code. Then probe at FIRST + SIZE. */
9819 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9820 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9822 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9825 /* Otherwise, do the same as above, but in a loop. Note that we must be
9826 extra careful with variables wrapping around because we might be at
9827 the very top (or the very bottom) of the address space and we have
9828 to be able to handle this case properly; in particular, we use an
9829 equality test for the loop condition. */
9832 HOST_WIDE_INT rounded_size, last;
9833 struct scratch_reg sr;
9835 get_scratch_register_on_entry (&sr);
9838 /* Step 1: round SIZE to the previous multiple of the interval. */
9840 rounded_size = size & -PROBE_INTERVAL;
9843 /* Step 2: compute initial and final value of the loop counter. */
9845 /* TEST_OFFSET = FIRST. */
9846 emit_move_insn (sr.reg, GEN_INT (-first));
9848 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9849 last = first + rounded_size;
9854 while (TEST_ADDR != LAST_ADDR)
9856 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9860 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9861 until it is equal to ROUNDED_SIZE. */
9863 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9866 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9867 that SIZE is equal to ROUNDED_SIZE. */
9869 if (size != rounded_size)
9870 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9873 rounded_size - size));
9875 release_scratch_register_on_entry (&sr);
9878 /* Make sure nothing is scheduled before we are done. */
9879 emit_insn (gen_blockage ());
9882 /* Probe a range of stack addresses from REG to END, inclusive. These are
9883 offsets from the current stack pointer. */
9886 output_probe_stack_range (rtx reg, rtx end)
9888 static int labelno = 0;
9889 char loop_lab[32], end_lab[32];
9892 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9893 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9895 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9897 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9900 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9901 fputs ("\tje\t", asm_out_file);
9902 assemble_name_raw (asm_out_file, end_lab);
9903 fputc ('\n', asm_out_file);
9905 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9906 xops[1] = GEN_INT (PROBE_INTERVAL);
9907 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9909 /* Probe at TEST_ADDR. */
9910 xops[0] = stack_pointer_rtx;
9912 xops[2] = const0_rtx;
9913 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9915 fprintf (asm_out_file, "\tjmp\t");
9916 assemble_name_raw (asm_out_file, loop_lab);
9917 fputc ('\n', asm_out_file);
9919 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9924 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9925 to be generated in correct form. */
9927 ix86_finalize_stack_realign_flags (void)
9929 /* Check if stack realign is really needed after reload, and
9930 stores result in cfun */
9931 unsigned int incoming_stack_boundary
9932 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9933 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9934 unsigned int stack_realign = (incoming_stack_boundary
9935 < (current_function_is_leaf
9936 ? crtl->max_used_stack_slot_alignment
9937 : crtl->stack_alignment_needed));
9939 if (crtl->stack_realign_finalized)
9941 /* After stack_realign_needed is finalized, we can't no longer
9943 gcc_assert (crtl->stack_realign_needed == stack_realign);
9947 /* If the only reason for frame_pointer_needed is that we conservatively
9948 assumed stack realignment might be needed, but in the end nothing that
9949 needed the stack alignment had been spilled, clear frame_pointer_needed
9950 and say we don't need stack realignment. */
9953 && frame_pointer_needed
9954 && current_function_is_leaf
9955 && flag_omit_frame_pointer
9956 && current_function_sp_is_unchanging
9957 && !ix86_current_function_calls_tls_descriptor
9958 && !crtl->accesses_prior_frames
9959 && !cfun->calls_alloca
9960 && !crtl->calls_eh_return
9961 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
9962 && !ix86_frame_pointer_required ()
9963 && get_frame_size () == 0
9964 && ix86_nsaved_sseregs () == 0
9965 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
9967 HARD_REG_SET set_up_by_prologue, prologue_used;
9970 CLEAR_HARD_REG_SET (prologue_used);
9971 CLEAR_HARD_REG_SET (set_up_by_prologue);
9972 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
9973 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
9974 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
9975 HARD_FRAME_POINTER_REGNUM);
9979 FOR_BB_INSNS (bb, insn)
9980 if (NONDEBUG_INSN_P (insn)
9981 && requires_stack_frame_p (insn, prologue_used,
9982 set_up_by_prologue))
9984 crtl->stack_realign_needed = stack_realign;
9985 crtl->stack_realign_finalized = true;
9990 frame_pointer_needed = false;
9991 stack_realign = false;
9992 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
9993 crtl->stack_alignment_needed = incoming_stack_boundary;
9994 crtl->stack_alignment_estimated = incoming_stack_boundary;
9995 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
9996 crtl->preferred_stack_boundary = incoming_stack_boundary;
9997 df_finish_pass (true);
9998 df_scan_alloc (NULL);
10000 df_compute_regs_ever_live (true);
10004 crtl->stack_realign_needed = stack_realign;
10005 crtl->stack_realign_finalized = true;
10008 /* Expand the prologue into a bunch of separate insns. */
10011 ix86_expand_prologue (void)
10013 struct machine_function *m = cfun->machine;
10016 struct ix86_frame frame;
10017 HOST_WIDE_INT allocate;
10018 bool int_registers_saved;
10019 bool sse_registers_saved;
10021 ix86_finalize_stack_realign_flags ();
10023 /* DRAP should not coexist with stack_realign_fp */
10024 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10026 memset (&m->fs, 0, sizeof (m->fs));
10028 /* Initialize CFA state for before the prologue. */
10029 m->fs.cfa_reg = stack_pointer_rtx;
10030 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10032 /* Track SP offset to the CFA. We continue tracking this after we've
10033 swapped the CFA register away from SP. In the case of re-alignment
10034 this is fudged; we're interested to offsets within the local frame. */
10035 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10036 m->fs.sp_valid = true;
10038 ix86_compute_frame_layout (&frame);
10040 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10042 /* We should have already generated an error for any use of
10043 ms_hook on a nested function. */
10044 gcc_checking_assert (!ix86_static_chain_on_stack);
10046 /* Check if profiling is active and we shall use profiling before
10047 prologue variant. If so sorry. */
10048 if (crtl->profile && flag_fentry != 0)
10049 sorry ("ms_hook_prologue attribute isn%'t compatible "
10050 "with -mfentry for 32-bit");
10052 /* In ix86_asm_output_function_label we emitted:
10053 8b ff movl.s %edi,%edi
10055 8b ec movl.s %esp,%ebp
10057 This matches the hookable function prologue in Win32 API
10058 functions in Microsoft Windows XP Service Pack 2 and newer.
10059 Wine uses this to enable Windows apps to hook the Win32 API
10060 functions provided by Wine.
10062 What that means is that we've already set up the frame pointer. */
10064 if (frame_pointer_needed
10065 && !(crtl->drap_reg && crtl->stack_realign_needed))
10069 /* We've decided to use the frame pointer already set up.
10070 Describe this to the unwinder by pretending that both
10071 push and mov insns happen right here.
10073 Putting the unwind info here at the end of the ms_hook
10074 is done so that we can make absolutely certain we get
10075 the required byte sequence at the start of the function,
10076 rather than relying on an assembler that can produce
10077 the exact encoding required.
10079 However it does mean (in the unpatched case) that we have
10080 a 1 insn window where the asynchronous unwind info is
10081 incorrect. However, if we placed the unwind info at
10082 its correct location we would have incorrect unwind info
10083 in the patched case. Which is probably all moot since
10084 I don't expect Wine generates dwarf2 unwind info for the
10085 system libraries that use this feature. */
10087 insn = emit_insn (gen_blockage ());
10089 push = gen_push (hard_frame_pointer_rtx);
10090 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10091 stack_pointer_rtx);
10092 RTX_FRAME_RELATED_P (push) = 1;
10093 RTX_FRAME_RELATED_P (mov) = 1;
10095 RTX_FRAME_RELATED_P (insn) = 1;
10096 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10097 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10099 /* Note that gen_push incremented m->fs.cfa_offset, even
10100 though we didn't emit the push insn here. */
10101 m->fs.cfa_reg = hard_frame_pointer_rtx;
10102 m->fs.fp_offset = m->fs.cfa_offset;
10103 m->fs.fp_valid = true;
10107 /* The frame pointer is not needed so pop %ebp again.
10108 This leaves us with a pristine state. */
10109 emit_insn (gen_pop (hard_frame_pointer_rtx));
10113 /* The first insn of a function that accepts its static chain on the
10114 stack is to push the register that would be filled in by a direct
10115 call. This insn will be skipped by the trampoline. */
10116 else if (ix86_static_chain_on_stack)
10118 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10119 emit_insn (gen_blockage ());
10121 /* We don't want to interpret this push insn as a register save,
10122 only as a stack adjustment. The real copy of the register as
10123 a save will be done later, if needed. */
10124 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10125 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10126 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10127 RTX_FRAME_RELATED_P (insn) = 1;
10130 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10131 of DRAP is needed and stack realignment is really needed after reload */
10132 if (stack_realign_drap)
10134 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10136 /* Only need to push parameter pointer reg if it is caller saved. */
10137 if (!call_used_regs[REGNO (crtl->drap_reg)])
10139 /* Push arg pointer reg */
10140 insn = emit_insn (gen_push (crtl->drap_reg));
10141 RTX_FRAME_RELATED_P (insn) = 1;
10144 /* Grab the argument pointer. */
10145 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10146 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10147 RTX_FRAME_RELATED_P (insn) = 1;
10148 m->fs.cfa_reg = crtl->drap_reg;
10149 m->fs.cfa_offset = 0;
10151 /* Align the stack. */
10152 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10154 GEN_INT (-align_bytes)));
10155 RTX_FRAME_RELATED_P (insn) = 1;
10157 /* Replicate the return address on the stack so that return
10158 address can be reached via (argp - 1) slot. This is needed
10159 to implement macro RETURN_ADDR_RTX and intrinsic function
10160 expand_builtin_return_addr etc. */
10161 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10162 t = gen_frame_mem (Pmode, t);
10163 insn = emit_insn (gen_push (t));
10164 RTX_FRAME_RELATED_P (insn) = 1;
10166 /* For the purposes of frame and register save area addressing,
10167 we've started over with a new frame. */
10168 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10169 m->fs.realigned = true;
10172 int_registers_saved = (frame.nregs == 0);
10173 sse_registers_saved = (frame.nsseregs == 0);
10175 if (frame_pointer_needed && !m->fs.fp_valid)
10177 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10178 slower on all targets. Also sdb doesn't like it. */
10179 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10180 RTX_FRAME_RELATED_P (insn) = 1;
10182 /* Push registers now, before setting the frame pointer
10184 if (!int_registers_saved
10186 && !frame.save_regs_using_mov)
10188 ix86_emit_save_regs ();
10189 int_registers_saved = true;
10190 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10193 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10195 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10196 RTX_FRAME_RELATED_P (insn) = 1;
10198 if (m->fs.cfa_reg == stack_pointer_rtx)
10199 m->fs.cfa_reg = hard_frame_pointer_rtx;
10200 m->fs.fp_offset = m->fs.sp_offset;
10201 m->fs.fp_valid = true;
10205 if (!int_registers_saved)
10207 /* If saving registers via PUSH, do so now. */
10208 if (!frame.save_regs_using_mov)
10210 ix86_emit_save_regs ();
10211 int_registers_saved = true;
10212 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10215 /* When using red zone we may start register saving before allocating
10216 the stack frame saving one cycle of the prologue. However, avoid
10217 doing this if we have to probe the stack; at least on x86_64 the
10218 stack probe can turn into a call that clobbers a red zone location. */
10219 else if (ix86_using_red_zone ()
10220 && (! TARGET_STACK_PROBE
10221 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10223 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10224 int_registers_saved = true;
10228 if (stack_realign_fp)
10230 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10231 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10233 /* The computation of the size of the re-aligned stack frame means
10234 that we must allocate the size of the register save area before
10235 performing the actual alignment. Otherwise we cannot guarantee
10236 that there's enough storage above the realignment point. */
10237 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10238 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10239 GEN_INT (m->fs.sp_offset
10240 - frame.sse_reg_save_offset),
10243 /* Align the stack. */
10244 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10246 GEN_INT (-align_bytes)));
10248 /* For the purposes of register save area addressing, the stack
10249 pointer is no longer valid. As for the value of sp_offset,
10250 see ix86_compute_frame_layout, which we need to match in order
10251 to pass verification of stack_pointer_offset at the end. */
10252 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10253 m->fs.sp_valid = false;
10256 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10258 if (flag_stack_usage_info)
10260 /* We start to count from ARG_POINTER. */
10261 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10263 /* If it was realigned, take into account the fake frame. */
10264 if (stack_realign_drap)
10266 if (ix86_static_chain_on_stack)
10267 stack_size += UNITS_PER_WORD;
10269 if (!call_used_regs[REGNO (crtl->drap_reg)])
10270 stack_size += UNITS_PER_WORD;
10272 /* This over-estimates by 1 minimal-stack-alignment-unit but
10273 mitigates that by counting in the new return address slot. */
10274 current_function_dynamic_stack_size
10275 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10278 current_function_static_stack_size = stack_size;
10281 /* On SEH target with very large frame size, allocate an area to save
10282 SSE registers (as the very large allocation won't be described). */
10284 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10285 && !sse_registers_saved)
10287 HOST_WIDE_INT sse_size =
10288 frame.sse_reg_save_offset - frame.reg_save_offset;
10290 gcc_assert (int_registers_saved);
10292 /* No need to do stack checking as the area will be immediately
10294 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10295 GEN_INT (-sse_size), -1,
10296 m->fs.cfa_reg == stack_pointer_rtx);
10297 allocate -= sse_size;
10298 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10299 sse_registers_saved = true;
10302 /* The stack has already been decremented by the instruction calling us
10303 so probe if the size is non-negative to preserve the protection area. */
10304 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10306 /* We expect the registers to be saved when probes are used. */
10307 gcc_assert (int_registers_saved);
10309 if (STACK_CHECK_MOVING_SP)
10311 ix86_adjust_stack_and_probe (allocate);
10316 HOST_WIDE_INT size = allocate;
10318 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10319 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10321 if (TARGET_STACK_PROBE)
10322 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10324 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10330 else if (!ix86_target_stack_probe ()
10331 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10333 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10334 GEN_INT (-allocate), -1,
10335 m->fs.cfa_reg == stack_pointer_rtx);
10339 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10341 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10343 bool eax_live = false;
10344 bool r10_live = false;
10347 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10348 if (!TARGET_64BIT_MS_ABI)
10349 eax_live = ix86_eax_live_at_start_p ();
10353 emit_insn (gen_push (eax));
10354 allocate -= UNITS_PER_WORD;
10358 r10 = gen_rtx_REG (Pmode, R10_REG);
10359 emit_insn (gen_push (r10));
10360 allocate -= UNITS_PER_WORD;
10363 emit_move_insn (eax, GEN_INT (allocate));
10364 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10366 /* Use the fact that AX still contains ALLOCATE. */
10367 adjust_stack_insn = (TARGET_64BIT
10368 ? gen_pro_epilogue_adjust_stack_di_sub
10369 : gen_pro_epilogue_adjust_stack_si_sub);
10371 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10372 stack_pointer_rtx, eax));
10374 /* Note that SEH directives need to continue tracking the stack
10375 pointer even after the frame pointer has been set up. */
10376 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10378 if (m->fs.cfa_reg == stack_pointer_rtx)
10379 m->fs.cfa_offset += allocate;
10381 RTX_FRAME_RELATED_P (insn) = 1;
10382 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10383 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10384 plus_constant (stack_pointer_rtx,
10387 m->fs.sp_offset += allocate;
10389 if (r10_live && eax_live)
10391 t = choose_baseaddr (m->fs.sp_offset - allocate);
10392 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10393 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10394 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10396 else if (eax_live || r10_live)
10398 t = choose_baseaddr (m->fs.sp_offset - allocate);
10399 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10402 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10404 /* If we havn't already set up the frame pointer, do so now. */
10405 if (frame_pointer_needed && !m->fs.fp_valid)
10407 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10408 GEN_INT (frame.stack_pointer_offset
10409 - frame.hard_frame_pointer_offset));
10410 insn = emit_insn (insn);
10411 RTX_FRAME_RELATED_P (insn) = 1;
10412 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10414 if (m->fs.cfa_reg == stack_pointer_rtx)
10415 m->fs.cfa_reg = hard_frame_pointer_rtx;
10416 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10417 m->fs.fp_valid = true;
10420 if (!int_registers_saved)
10421 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10422 if (!sse_registers_saved)
10423 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10425 pic_reg_used = false;
10426 if (pic_offset_table_rtx
10427 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10430 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10432 if (alt_pic_reg_used != INVALID_REGNUM)
10433 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10435 pic_reg_used = true;
10442 if (ix86_cmodel == CM_LARGE_PIC)
10444 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10445 rtx label = gen_label_rtx ();
10446 emit_label (label);
10447 LABEL_PRESERVE_P (label) = 1;
10448 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10449 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10450 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10451 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10452 pic_offset_table_rtx, tmp_reg));
10455 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10459 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10460 RTX_FRAME_RELATED_P (insn) = 1;
10461 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10465 /* In the pic_reg_used case, make sure that the got load isn't deleted
10466 when mcount needs it. Blockage to avoid call movement across mcount
10467 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10469 if (crtl->profile && !flag_fentry && pic_reg_used)
10470 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10472 if (crtl->drap_reg && !crtl->stack_realign_needed)
10474 /* vDRAP is setup but after reload it turns out stack realign
10475 isn't necessary, here we will emit prologue to setup DRAP
10476 without stack realign adjustment */
10477 t = choose_baseaddr (0);
10478 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10481 /* Prevent instructions from being scheduled into register save push
10482 sequence when access to the redzone area is done through frame pointer.
10483 The offset between the frame pointer and the stack pointer is calculated
10484 relative to the value of the stack pointer at the end of the function
10485 prologue, and moving instructions that access redzone area via frame
10486 pointer inside push sequence violates this assumption. */
10487 if (frame_pointer_needed && frame.red_zone_size)
10488 emit_insn (gen_memory_blockage ());
10490 /* Emit cld instruction if stringops are used in the function. */
10491 if (TARGET_CLD && ix86_current_function_needs_cld)
10492 emit_insn (gen_cld ());
10494 /* SEH requires that the prologue end within 256 bytes of the start of
10495 the function. Prevent instruction schedules that would extend that.
10496 Further, prevent alloca modifications to the stack pointer from being
10497 combined with prologue modifications. */
10499 emit_insn (gen_prologue_use (stack_pointer_rtx));
10502 /* Emit code to restore REG using a POP insn. */
10505 ix86_emit_restore_reg_using_pop (rtx reg)
10507 struct machine_function *m = cfun->machine;
10508 rtx insn = emit_insn (gen_pop (reg));
10510 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10511 m->fs.sp_offset -= UNITS_PER_WORD;
10513 if (m->fs.cfa_reg == crtl->drap_reg
10514 && REGNO (reg) == REGNO (crtl->drap_reg))
10516 /* Previously we'd represented the CFA as an expression
10517 like *(%ebp - 8). We've just popped that value from
10518 the stack, which means we need to reset the CFA to
10519 the drap register. This will remain until we restore
10520 the stack pointer. */
10521 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10522 RTX_FRAME_RELATED_P (insn) = 1;
10524 /* This means that the DRAP register is valid for addressing too. */
10525 m->fs.drap_valid = true;
10529 if (m->fs.cfa_reg == stack_pointer_rtx)
10531 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10532 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10533 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10534 RTX_FRAME_RELATED_P (insn) = 1;
10536 m->fs.cfa_offset -= UNITS_PER_WORD;
10539 /* When the frame pointer is the CFA, and we pop it, we are
10540 swapping back to the stack pointer as the CFA. This happens
10541 for stack frames that don't allocate other data, so we assume
10542 the stack pointer is now pointing at the return address, i.e.
10543 the function entry state, which makes the offset be 1 word. */
10544 if (reg == hard_frame_pointer_rtx)
10546 m->fs.fp_valid = false;
10547 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10549 m->fs.cfa_reg = stack_pointer_rtx;
10550 m->fs.cfa_offset -= UNITS_PER_WORD;
10552 add_reg_note (insn, REG_CFA_DEF_CFA,
10553 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10554 GEN_INT (m->fs.cfa_offset)));
10555 RTX_FRAME_RELATED_P (insn) = 1;
10560 /* Emit code to restore saved registers using POP insns. */
10563 ix86_emit_restore_regs_using_pop (void)
10565 unsigned int regno;
10567 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10568 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10569 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10572 /* Emit code and notes for the LEAVE instruction. */
10575 ix86_emit_leave (void)
10577 struct machine_function *m = cfun->machine;
10578 rtx insn = emit_insn (ix86_gen_leave ());
10580 ix86_add_queued_cfa_restore_notes (insn);
10582 gcc_assert (m->fs.fp_valid);
10583 m->fs.sp_valid = true;
10584 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10585 m->fs.fp_valid = false;
10587 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10589 m->fs.cfa_reg = stack_pointer_rtx;
10590 m->fs.cfa_offset = m->fs.sp_offset;
10592 add_reg_note (insn, REG_CFA_DEF_CFA,
10593 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10594 RTX_FRAME_RELATED_P (insn) = 1;
10596 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10600 /* Emit code to restore saved registers using MOV insns.
10601 First register is restored from CFA - CFA_OFFSET. */
10603 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10604 bool maybe_eh_return)
10606 struct machine_function *m = cfun->machine;
10607 unsigned int regno;
10609 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10610 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10612 rtx reg = gen_rtx_REG (Pmode, regno);
10615 mem = choose_baseaddr (cfa_offset);
10616 mem = gen_frame_mem (Pmode, mem);
10617 insn = emit_move_insn (reg, mem);
10619 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10621 /* Previously we'd represented the CFA as an expression
10622 like *(%ebp - 8). We've just popped that value from
10623 the stack, which means we need to reset the CFA to
10624 the drap register. This will remain until we restore
10625 the stack pointer. */
10626 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10627 RTX_FRAME_RELATED_P (insn) = 1;
10629 /* This means that the DRAP register is valid for addressing. */
10630 m->fs.drap_valid = true;
10633 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10635 cfa_offset -= UNITS_PER_WORD;
10639 /* Emit code to restore saved registers using MOV insns.
10640 First register is restored from CFA - CFA_OFFSET. */
10642 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10643 bool maybe_eh_return)
10645 unsigned int regno;
10647 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10648 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10650 rtx reg = gen_rtx_REG (V4SFmode, regno);
10653 mem = choose_baseaddr (cfa_offset);
10654 mem = gen_rtx_MEM (V4SFmode, mem);
10655 set_mem_align (mem, 128);
10656 emit_move_insn (reg, mem);
10658 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10664 /* Emit vzeroupper if needed. */
10667 ix86_maybe_emit_epilogue_vzeroupper (void)
10669 if (TARGET_VZEROUPPER
10670 && !TREE_THIS_VOLATILE (cfun->decl)
10671 && !cfun->machine->caller_return_avx256_p)
10672 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10675 /* Restore function stack, frame, and registers. */
10678 ix86_expand_epilogue (int style)
10680 struct machine_function *m = cfun->machine;
10681 struct machine_frame_state frame_state_save = m->fs;
10682 struct ix86_frame frame;
10683 bool restore_regs_via_mov;
10686 ix86_finalize_stack_realign_flags ();
10687 ix86_compute_frame_layout (&frame);
10689 m->fs.sp_valid = (!frame_pointer_needed
10690 || (current_function_sp_is_unchanging
10691 && !stack_realign_fp));
10692 gcc_assert (!m->fs.sp_valid
10693 || m->fs.sp_offset == frame.stack_pointer_offset);
10695 /* The FP must be valid if the frame pointer is present. */
10696 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10697 gcc_assert (!m->fs.fp_valid
10698 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10700 /* We must have *some* valid pointer to the stack frame. */
10701 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10703 /* The DRAP is never valid at this point. */
10704 gcc_assert (!m->fs.drap_valid);
10706 /* See the comment about red zone and frame
10707 pointer usage in ix86_expand_prologue. */
10708 if (frame_pointer_needed && frame.red_zone_size)
10709 emit_insn (gen_memory_blockage ());
10711 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10712 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10714 /* Determine the CFA offset of the end of the red-zone. */
10715 m->fs.red_zone_offset = 0;
10716 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10718 /* The red-zone begins below the return address. */
10719 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10721 /* When the register save area is in the aligned portion of
10722 the stack, determine the maximum runtime displacement that
10723 matches up with the aligned frame. */
10724 if (stack_realign_drap)
10725 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10729 /* Special care must be taken for the normal return case of a function
10730 using eh_return: the eax and edx registers are marked as saved, but
10731 not restored along this path. Adjust the save location to match. */
10732 if (crtl->calls_eh_return && style != 2)
10733 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10735 /* EH_RETURN requires the use of moves to function properly. */
10736 if (crtl->calls_eh_return)
10737 restore_regs_via_mov = true;
10738 /* SEH requires the use of pops to identify the epilogue. */
10739 else if (TARGET_SEH)
10740 restore_regs_via_mov = false;
10741 /* If we're only restoring one register and sp is not valid then
10742 using a move instruction to restore the register since it's
10743 less work than reloading sp and popping the register. */
10744 else if (!m->fs.sp_valid && frame.nregs <= 1)
10745 restore_regs_via_mov = true;
10746 else if (TARGET_EPILOGUE_USING_MOVE
10747 && cfun->machine->use_fast_prologue_epilogue
10748 && (frame.nregs > 1
10749 || m->fs.sp_offset != frame.reg_save_offset))
10750 restore_regs_via_mov = true;
10751 else if (frame_pointer_needed
10753 && m->fs.sp_offset != frame.reg_save_offset)
10754 restore_regs_via_mov = true;
10755 else if (frame_pointer_needed
10756 && TARGET_USE_LEAVE
10757 && cfun->machine->use_fast_prologue_epilogue
10758 && frame.nregs == 1)
10759 restore_regs_via_mov = true;
10761 restore_regs_via_mov = false;
10763 if (restore_regs_via_mov || frame.nsseregs)
10765 /* Ensure that the entire register save area is addressable via
10766 the stack pointer, if we will restore via sp. */
10768 && m->fs.sp_offset > 0x7fffffff
10769 && !(m->fs.fp_valid || m->fs.drap_valid)
10770 && (frame.nsseregs + frame.nregs) != 0)
10772 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10773 GEN_INT (m->fs.sp_offset
10774 - frame.sse_reg_save_offset),
10776 m->fs.cfa_reg == stack_pointer_rtx);
10780 /* If there are any SSE registers to restore, then we have to do it
10781 via moves, since there's obviously no pop for SSE regs. */
10782 if (frame.nsseregs)
10783 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10786 if (restore_regs_via_mov)
10791 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10793 /* eh_return epilogues need %ecx added to the stack pointer. */
10796 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10798 /* Stack align doesn't work with eh_return. */
10799 gcc_assert (!stack_realign_drap);
10800 /* Neither does regparm nested functions. */
10801 gcc_assert (!ix86_static_chain_on_stack);
10803 if (frame_pointer_needed)
10805 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10806 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10807 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10809 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10810 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10812 /* Note that we use SA as a temporary CFA, as the return
10813 address is at the proper place relative to it. We
10814 pretend this happens at the FP restore insn because
10815 prior to this insn the FP would be stored at the wrong
10816 offset relative to SA, and after this insn we have no
10817 other reasonable register to use for the CFA. We don't
10818 bother resetting the CFA to the SP for the duration of
10819 the return insn. */
10820 add_reg_note (insn, REG_CFA_DEF_CFA,
10821 plus_constant (sa, UNITS_PER_WORD));
10822 ix86_add_queued_cfa_restore_notes (insn);
10823 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10824 RTX_FRAME_RELATED_P (insn) = 1;
10826 m->fs.cfa_reg = sa;
10827 m->fs.cfa_offset = UNITS_PER_WORD;
10828 m->fs.fp_valid = false;
10830 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10831 const0_rtx, style, false);
10835 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10836 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10837 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10838 ix86_add_queued_cfa_restore_notes (insn);
10840 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10841 if (m->fs.cfa_offset != UNITS_PER_WORD)
10843 m->fs.cfa_offset = UNITS_PER_WORD;
10844 add_reg_note (insn, REG_CFA_DEF_CFA,
10845 plus_constant (stack_pointer_rtx,
10847 RTX_FRAME_RELATED_P (insn) = 1;
10850 m->fs.sp_offset = UNITS_PER_WORD;
10851 m->fs.sp_valid = true;
10856 /* SEH requires that the function end with (1) a stack adjustment
10857 if necessary, (2) a sequence of pops, and (3) a return or
10858 jump instruction. Prevent insns from the function body from
10859 being scheduled into this sequence. */
10862 /* Prevent a catch region from being adjacent to the standard
10863 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10864 several other flags that would be interesting to test are
10866 if (flag_non_call_exceptions)
10867 emit_insn (gen_nops (const1_rtx));
10869 emit_insn (gen_blockage ());
10872 /* First step is to deallocate the stack frame so that we can
10873 pop the registers. Also do it on SEH target for very large
10874 frame as the emitted instructions aren't allowed by the ABI in
10876 if (!m->fs.sp_valid
10878 && (m->fs.sp_offset - frame.reg_save_offset
10879 >= SEH_MAX_FRAME_SIZE)))
10881 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10882 GEN_INT (m->fs.fp_offset
10883 - frame.reg_save_offset),
10886 else if (m->fs.sp_offset != frame.reg_save_offset)
10888 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10889 GEN_INT (m->fs.sp_offset
10890 - frame.reg_save_offset),
10892 m->fs.cfa_reg == stack_pointer_rtx);
10895 ix86_emit_restore_regs_using_pop ();
10898 /* If we used a stack pointer and haven't already got rid of it,
10900 if (m->fs.fp_valid)
10902 /* If the stack pointer is valid and pointing at the frame
10903 pointer store address, then we only need a pop. */
10904 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10905 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10906 /* Leave results in shorter dependency chains on CPUs that are
10907 able to grok it fast. */
10908 else if (TARGET_USE_LEAVE
10909 || optimize_function_for_size_p (cfun)
10910 || !cfun->machine->use_fast_prologue_epilogue)
10911 ix86_emit_leave ();
10914 pro_epilogue_adjust_stack (stack_pointer_rtx,
10915 hard_frame_pointer_rtx,
10916 const0_rtx, style, !using_drap);
10917 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10923 int param_ptr_offset = UNITS_PER_WORD;
10926 gcc_assert (stack_realign_drap);
10928 if (ix86_static_chain_on_stack)
10929 param_ptr_offset += UNITS_PER_WORD;
10930 if (!call_used_regs[REGNO (crtl->drap_reg)])
10931 param_ptr_offset += UNITS_PER_WORD;
10933 insn = emit_insn (gen_rtx_SET
10934 (VOIDmode, stack_pointer_rtx,
10935 gen_rtx_PLUS (Pmode,
10937 GEN_INT (-param_ptr_offset))));
10938 m->fs.cfa_reg = stack_pointer_rtx;
10939 m->fs.cfa_offset = param_ptr_offset;
10940 m->fs.sp_offset = param_ptr_offset;
10941 m->fs.realigned = false;
10943 add_reg_note (insn, REG_CFA_DEF_CFA,
10944 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10945 GEN_INT (param_ptr_offset)));
10946 RTX_FRAME_RELATED_P (insn) = 1;
10948 if (!call_used_regs[REGNO (crtl->drap_reg)])
10949 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10952 /* At this point the stack pointer must be valid, and we must have
10953 restored all of the registers. We may not have deallocated the
10954 entire stack frame. We've delayed this until now because it may
10955 be possible to merge the local stack deallocation with the
10956 deallocation forced by ix86_static_chain_on_stack. */
10957 gcc_assert (m->fs.sp_valid);
10958 gcc_assert (!m->fs.fp_valid);
10959 gcc_assert (!m->fs.realigned);
10960 if (m->fs.sp_offset != UNITS_PER_WORD)
10962 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10963 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10967 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10969 /* Sibcall epilogues don't want a return instruction. */
10972 m->fs = frame_state_save;
10976 /* Emit vzeroupper if needed. */
10977 ix86_maybe_emit_epilogue_vzeroupper ();
10979 if (crtl->args.pops_args && crtl->args.size)
10981 rtx popc = GEN_INT (crtl->args.pops_args);
10983 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10984 address, do explicit add, and jump indirectly to the caller. */
10986 if (crtl->args.pops_args >= 65536)
10988 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10991 /* There is no "pascal" calling convention in any 64bit ABI. */
10992 gcc_assert (!TARGET_64BIT);
10994 insn = emit_insn (gen_pop (ecx));
10995 m->fs.cfa_offset -= UNITS_PER_WORD;
10996 m->fs.sp_offset -= UNITS_PER_WORD;
10998 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10999 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11000 add_reg_note (insn, REG_CFA_REGISTER,
11001 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11002 RTX_FRAME_RELATED_P (insn) = 1;
11004 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11006 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11009 emit_jump_insn (gen_simple_return_pop_internal (popc));
11012 emit_jump_insn (gen_simple_return_internal ());
11014 /* Restore the state back to the state from the prologue,
11015 so that it's correct for the next epilogue. */
11016 m->fs = frame_state_save;
11019 /* Reset from the function's potential modifications. */
11022 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11023 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11025 if (pic_offset_table_rtx)
11026 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11028 /* Mach-O doesn't support labels at the end of objects, so if
11029 it looks like we might want one, insert a NOP. */
11031 rtx insn = get_last_insn ();
11032 rtx deleted_debug_label = NULL_RTX;
11035 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11037 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11038 notes only, instead set their CODE_LABEL_NUMBER to -1,
11039 otherwise there would be code generation differences
11040 in between -g and -g0. */
11041 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11042 deleted_debug_label = insn;
11043 insn = PREV_INSN (insn);
11048 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11049 fputs ("\tnop\n", file);
11050 else if (deleted_debug_label)
11051 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11052 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11053 CODE_LABEL_NUMBER (insn) = -1;
11059 /* Return a scratch register to use in the split stack prologue. The
11060 split stack prologue is used for -fsplit-stack. It is the first
11061 instructions in the function, even before the regular prologue.
11062 The scratch register can be any caller-saved register which is not
11063 used for parameters or for the static chain. */
11065 static unsigned int
11066 split_stack_prologue_scratch_regno (void)
11075 is_fastcall = (lookup_attribute ("fastcall",
11076 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11078 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11082 if (DECL_STATIC_CHAIN (cfun->decl))
11084 sorry ("-fsplit-stack does not support fastcall with "
11085 "nested function");
11086 return INVALID_REGNUM;
11090 else if (regparm < 3)
11092 if (!DECL_STATIC_CHAIN (cfun->decl))
11098 sorry ("-fsplit-stack does not support 2 register "
11099 " parameters for a nested function");
11100 return INVALID_REGNUM;
11107 /* FIXME: We could make this work by pushing a register
11108 around the addition and comparison. */
11109 sorry ("-fsplit-stack does not support 3 register parameters");
11110 return INVALID_REGNUM;
11115 /* A SYMBOL_REF for the function which allocates new stackspace for
11118 static GTY(()) rtx split_stack_fn;
11120 /* A SYMBOL_REF for the more stack function when using the large
11123 static GTY(()) rtx split_stack_fn_large;
11125 /* Handle -fsplit-stack. These are the first instructions in the
11126 function, even before the regular prologue. */
11129 ix86_expand_split_stack_prologue (void)
11131 struct ix86_frame frame;
11132 HOST_WIDE_INT allocate;
11133 unsigned HOST_WIDE_INT args_size;
11134 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11135 rtx scratch_reg = NULL_RTX;
11136 rtx varargs_label = NULL_RTX;
11139 gcc_assert (flag_split_stack && reload_completed);
11141 ix86_finalize_stack_realign_flags ();
11142 ix86_compute_frame_layout (&frame);
11143 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11145 /* This is the label we will branch to if we have enough stack
11146 space. We expect the basic block reordering pass to reverse this
11147 branch if optimizing, so that we branch in the unlikely case. */
11148 label = gen_label_rtx ();
11150 /* We need to compare the stack pointer minus the frame size with
11151 the stack boundary in the TCB. The stack boundary always gives
11152 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11153 can compare directly. Otherwise we need to do an addition. */
11155 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11156 UNSPEC_STACK_CHECK);
11157 limit = gen_rtx_CONST (Pmode, limit);
11158 limit = gen_rtx_MEM (Pmode, limit);
11159 if (allocate < SPLIT_STACK_AVAILABLE)
11160 current = stack_pointer_rtx;
11163 unsigned int scratch_regno;
11166 /* We need a scratch register to hold the stack pointer minus
11167 the required frame size. Since this is the very start of the
11168 function, the scratch register can be any caller-saved
11169 register which is not used for parameters. */
11170 offset = GEN_INT (- allocate);
11171 scratch_regno = split_stack_prologue_scratch_regno ();
11172 if (scratch_regno == INVALID_REGNUM)
11174 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11175 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11177 /* We don't use ix86_gen_add3 in this case because it will
11178 want to split to lea, but when not optimizing the insn
11179 will not be split after this point. */
11180 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11181 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11186 emit_move_insn (scratch_reg, offset);
11187 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11188 stack_pointer_rtx));
11190 current = scratch_reg;
11193 ix86_expand_branch (GEU, current, limit, label);
11194 jump_insn = get_last_insn ();
11195 JUMP_LABEL (jump_insn) = label;
11197 /* Mark the jump as very likely to be taken. */
11198 add_reg_note (jump_insn, REG_BR_PROB,
11199 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11201 if (split_stack_fn == NULL_RTX)
11202 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11203 fn = split_stack_fn;
11205 /* Get more stack space. We pass in the desired stack space and the
11206 size of the arguments to copy to the new stack. In 32-bit mode
11207 we push the parameters; __morestack will return on a new stack
11208 anyhow. In 64-bit mode we pass the parameters in r10 and
11210 allocate_rtx = GEN_INT (allocate);
11211 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11212 call_fusage = NULL_RTX;
11217 reg10 = gen_rtx_REG (Pmode, R10_REG);
11218 reg11 = gen_rtx_REG (Pmode, R11_REG);
11220 /* If this function uses a static chain, it will be in %r10.
11221 Preserve it across the call to __morestack. */
11222 if (DECL_STATIC_CHAIN (cfun->decl))
11226 rax = gen_rtx_REG (Pmode, AX_REG);
11227 emit_move_insn (rax, reg10);
11228 use_reg (&call_fusage, rax);
11231 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11233 HOST_WIDE_INT argval;
11235 /* When using the large model we need to load the address
11236 into a register, and we've run out of registers. So we
11237 switch to a different calling convention, and we call a
11238 different function: __morestack_large. We pass the
11239 argument size in the upper 32 bits of r10 and pass the
11240 frame size in the lower 32 bits. */
11241 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11242 gcc_assert ((args_size & 0xffffffff) == args_size);
11244 if (split_stack_fn_large == NULL_RTX)
11245 split_stack_fn_large =
11246 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11248 if (ix86_cmodel == CM_LARGE_PIC)
11252 label = gen_label_rtx ();
11253 emit_label (label);
11254 LABEL_PRESERVE_P (label) = 1;
11255 emit_insn (gen_set_rip_rex64 (reg10, label));
11256 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11257 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11258 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11260 x = gen_rtx_CONST (Pmode, x);
11261 emit_move_insn (reg11, x);
11262 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11263 x = gen_const_mem (Pmode, x);
11264 emit_move_insn (reg11, x);
11267 emit_move_insn (reg11, split_stack_fn_large);
11271 argval = ((args_size << 16) << 16) + allocate;
11272 emit_move_insn (reg10, GEN_INT (argval));
11276 emit_move_insn (reg10, allocate_rtx);
11277 emit_move_insn (reg11, GEN_INT (args_size));
11278 use_reg (&call_fusage, reg11);
11281 use_reg (&call_fusage, reg10);
11285 emit_insn (gen_push (GEN_INT (args_size)));
11286 emit_insn (gen_push (allocate_rtx));
11288 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11289 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11291 add_function_usage_to (call_insn, call_fusage);
11293 /* In order to make call/return prediction work right, we now need
11294 to execute a return instruction. See
11295 libgcc/config/i386/morestack.S for the details on how this works.
11297 For flow purposes gcc must not see this as a return
11298 instruction--we need control flow to continue at the subsequent
11299 label. Therefore, we use an unspec. */
11300 gcc_assert (crtl->args.pops_args < 65536);
11301 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11303 /* If we are in 64-bit mode and this function uses a static chain,
11304 we saved %r10 in %rax before calling _morestack. */
11305 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11306 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11307 gen_rtx_REG (Pmode, AX_REG));
11309 /* If this function calls va_start, we need to store a pointer to
11310 the arguments on the old stack, because they may not have been
11311 all copied to the new stack. At this point the old stack can be
11312 found at the frame pointer value used by __morestack, because
11313 __morestack has set that up before calling back to us. Here we
11314 store that pointer in a scratch register, and in
11315 ix86_expand_prologue we store the scratch register in a stack
11317 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11319 unsigned int scratch_regno;
11323 scratch_regno = split_stack_prologue_scratch_regno ();
11324 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11325 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11329 return address within this function
11330 return address of caller of this function
11332 So we add three words to get to the stack arguments.
11336 return address within this function
11337 first argument to __morestack
11338 second argument to __morestack
11339 return address of caller of this function
11341 So we add five words to get to the stack arguments.
11343 words = TARGET_64BIT ? 3 : 5;
11344 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11345 gen_rtx_PLUS (Pmode, frame_reg,
11346 GEN_INT (words * UNITS_PER_WORD))));
11348 varargs_label = gen_label_rtx ();
11349 emit_jump_insn (gen_jump (varargs_label));
11350 JUMP_LABEL (get_last_insn ()) = varargs_label;
11355 emit_label (label);
11356 LABEL_NUSES (label) = 1;
11358 /* If this function calls va_start, we now have to set the scratch
11359 register for the case where we do not call __morestack. In this
11360 case we need to set it based on the stack pointer. */
11361 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11363 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11364 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11365 GEN_INT (UNITS_PER_WORD))));
11367 emit_label (varargs_label);
11368 LABEL_NUSES (varargs_label) = 1;
11372 /* We may have to tell the dataflow pass that the split stack prologue
11373 is initializing a scratch register. */
11376 ix86_live_on_entry (bitmap regs)
11378 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11380 gcc_assert (flag_split_stack);
11381 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11385 /* Determine if op is suitable SUBREG RTX for address. */
11388 ix86_address_subreg_operand (rtx op)
11390 enum machine_mode mode;
11395 mode = GET_MODE (op);
11397 if (GET_MODE_CLASS (mode) != MODE_INT)
11400 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11401 failures when the register is one word out of a two word structure. */
11402 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11405 /* simplify_subreg does not handle stack pointer. */
11406 if (REGNO (op) == STACK_POINTER_REGNUM)
11409 /* Allow only SUBREGs of non-eliminable hard registers. */
11410 return register_no_elim_operand (op, mode);
11413 /* Extract the parts of an RTL expression that is a valid memory address
11414 for an instruction. Return 0 if the structure of the address is
11415 grossly off. Return -1 if the address contains ASHIFT, so it is not
11416 strictly valid, but still used for computing length of lea instruction. */
11419 ix86_decompose_address (rtx addr, struct ix86_address *out)
11421 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11422 rtx base_reg, index_reg;
11423 HOST_WIDE_INT scale = 1;
11424 rtx scale_rtx = NULL_RTX;
11427 enum ix86_address_seg seg = SEG_DEFAULT;
11429 /* Allow zero-extended SImode addresses,
11430 they will be emitted with addr32 prefix. */
11431 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11433 if (GET_CODE (addr) == ZERO_EXTEND
11434 && GET_MODE (XEXP (addr, 0)) == SImode)
11436 addr = XEXP (addr, 0);
11437 if (CONST_INT_P (addr))
11440 else if (GET_CODE (addr) == AND
11441 && const_32bit_mask (XEXP (addr, 1), DImode))
11443 addr = XEXP (addr, 0);
11445 /* Adjust SUBREGs. */
11446 if (GET_CODE (addr) == SUBREG
11447 && GET_MODE (SUBREG_REG (addr)) == SImode)
11449 addr = SUBREG_REG (addr);
11450 if (CONST_INT_P (addr))
11453 else if (GET_MODE (addr) == DImode)
11454 addr = gen_rtx_SUBREG (SImode, addr, 0);
11455 else if (GET_MODE (addr) != VOIDmode)
11460 /* Allow SImode subregs of DImode addresses,
11461 they will be emitted with addr32 prefix. */
11462 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11464 if (GET_CODE (addr) == SUBREG
11465 && GET_MODE (SUBREG_REG (addr)) == DImode)
11467 addr = SUBREG_REG (addr);
11468 if (CONST_INT_P (addr))
11475 else if (GET_CODE (addr) == SUBREG)
11477 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11482 else if (GET_CODE (addr) == PLUS)
11484 rtx addends[4], op;
11492 addends[n++] = XEXP (op, 1);
11495 while (GET_CODE (op) == PLUS);
11500 for (i = n; i >= 0; --i)
11503 switch (GET_CODE (op))
11508 index = XEXP (op, 0);
11509 scale_rtx = XEXP (op, 1);
11515 index = XEXP (op, 0);
11516 tmp = XEXP (op, 1);
11517 if (!CONST_INT_P (tmp))
11519 scale = INTVAL (tmp);
11520 if ((unsigned HOST_WIDE_INT) scale > 3)
11522 scale = 1 << scale;
11526 if (XINT (op, 1) == UNSPEC_TP
11527 && TARGET_TLS_DIRECT_SEG_REFS
11528 && seg == SEG_DEFAULT)
11529 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11535 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11562 else if (GET_CODE (addr) == MULT)
11564 index = XEXP (addr, 0); /* index*scale */
11565 scale_rtx = XEXP (addr, 1);
11567 else if (GET_CODE (addr) == ASHIFT)
11569 /* We're called for lea too, which implements ashift on occasion. */
11570 index = XEXP (addr, 0);
11571 tmp = XEXP (addr, 1);
11572 if (!CONST_INT_P (tmp))
11574 scale = INTVAL (tmp);
11575 if ((unsigned HOST_WIDE_INT) scale > 3)
11577 scale = 1 << scale;
11580 else if (CONST_INT_P (addr))
11582 if (!x86_64_immediate_operand (addr, VOIDmode))
11585 /* Constant addresses are sign extended to 64bit, we have to
11586 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11588 && val_signbit_known_set_p (SImode, INTVAL (addr)))
11594 disp = addr; /* displacement */
11600 else if (GET_CODE (index) == SUBREG
11601 && ix86_address_subreg_operand (SUBREG_REG (index)))
11607 /* Extract the integral value of scale. */
11610 if (!CONST_INT_P (scale_rtx))
11612 scale = INTVAL (scale_rtx);
11615 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11616 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11618 /* Avoid useless 0 displacement. */
11619 if (disp == const0_rtx && (base || index))
11622 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11623 if (base_reg && index_reg && scale == 1
11624 && (index_reg == arg_pointer_rtx
11625 || index_reg == frame_pointer_rtx
11626 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11629 tmp = base, base = index, index = tmp;
11630 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11633 /* Special case: %ebp cannot be encoded as a base without a displacement.
11637 && (base_reg == hard_frame_pointer_rtx
11638 || base_reg == frame_pointer_rtx
11639 || base_reg == arg_pointer_rtx
11640 || (REG_P (base_reg)
11641 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11642 || REGNO (base_reg) == R13_REG))))
11645 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11646 Avoid this by transforming to [%esi+0].
11647 Reload calls address legitimization without cfun defined, so we need
11648 to test cfun for being non-NULL. */
11649 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11650 && base_reg && !index_reg && !disp
11651 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11654 /* Special case: encode reg+reg instead of reg*2. */
11655 if (!base && index && scale == 2)
11656 base = index, base_reg = index_reg, scale = 1;
11658 /* Special case: scaling cannot be encoded without base or displacement. */
11659 if (!base && !disp && index && scale != 1)
11663 out->index = index;
11665 out->scale = scale;
11671 /* Return cost of the memory address x.
11672 For i386, it is better to use a complex address than let gcc copy
11673 the address into a reg and make a new pseudo. But not if the address
11674 requires to two regs - that would mean more pseudos with longer
11677 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11679 struct ix86_address parts;
11681 int ok = ix86_decompose_address (x, &parts);
11685 if (parts.base && GET_CODE (parts.base) == SUBREG)
11686 parts.base = SUBREG_REG (parts.base);
11687 if (parts.index && GET_CODE (parts.index) == SUBREG)
11688 parts.index = SUBREG_REG (parts.index);
11690 /* Attempt to minimize number of registers in the address. */
11692 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11694 && (!REG_P (parts.index)
11695 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11699 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11701 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11702 && parts.base != parts.index)
11705 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11706 since it's predecode logic can't detect the length of instructions
11707 and it degenerates to vector decoded. Increase cost of such
11708 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11709 to split such addresses or even refuse such addresses at all.
11711 Following addressing modes are affected:
11716 The first and last case may be avoidable by explicitly coding the zero in
11717 memory address, but I don't have AMD-K6 machine handy to check this
11721 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11722 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11723 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11729 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11730 this is used for to form addresses to local data when -fPIC is in
11734 darwin_local_data_pic (rtx disp)
11736 return (GET_CODE (disp) == UNSPEC
11737 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11740 /* Determine if a given RTX is a valid constant. We already know this
11741 satisfies CONSTANT_P. */
11744 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11746 switch (GET_CODE (x))
11751 if (GET_CODE (x) == PLUS)
11753 if (!CONST_INT_P (XEXP (x, 1)))
11758 if (TARGET_MACHO && darwin_local_data_pic (x))
11761 /* Only some unspecs are valid as "constants". */
11762 if (GET_CODE (x) == UNSPEC)
11763 switch (XINT (x, 1))
11766 case UNSPEC_GOTOFF:
11767 case UNSPEC_PLTOFF:
11768 return TARGET_64BIT;
11770 case UNSPEC_NTPOFF:
11771 x = XVECEXP (x, 0, 0);
11772 return (GET_CODE (x) == SYMBOL_REF
11773 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11774 case UNSPEC_DTPOFF:
11775 x = XVECEXP (x, 0, 0);
11776 return (GET_CODE (x) == SYMBOL_REF
11777 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11782 /* We must have drilled down to a symbol. */
11783 if (GET_CODE (x) == LABEL_REF)
11785 if (GET_CODE (x) != SYMBOL_REF)
11790 /* TLS symbols are never valid. */
11791 if (SYMBOL_REF_TLS_MODEL (x))
11794 /* DLLIMPORT symbols are never valid. */
11795 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11796 && SYMBOL_REF_DLLIMPORT_P (x))
11800 /* mdynamic-no-pic */
11801 if (MACHO_DYNAMIC_NO_PIC_P)
11802 return machopic_symbol_defined_p (x);
11807 if (GET_MODE (x) == TImode
11808 && x != CONST0_RTX (TImode)
11814 if (!standard_sse_constant_p (x))
11821 /* Otherwise we handle everything else in the move patterns. */
11825 /* Determine if it's legal to put X into the constant pool. This
11826 is not possible for the address of thread-local symbols, which
11827 is checked above. */
11830 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11832 /* We can always put integral constants and vectors in memory. */
11833 switch (GET_CODE (x))
11843 return !ix86_legitimate_constant_p (mode, x);
11847 /* Nonzero if the constant value X is a legitimate general operand
11848 when generating PIC code. It is given that flag_pic is on and
11849 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11852 legitimate_pic_operand_p (rtx x)
11856 switch (GET_CODE (x))
11859 inner = XEXP (x, 0);
11860 if (GET_CODE (inner) == PLUS
11861 && CONST_INT_P (XEXP (inner, 1)))
11862 inner = XEXP (inner, 0);
11864 /* Only some unspecs are valid as "constants". */
11865 if (GET_CODE (inner) == UNSPEC)
11866 switch (XINT (inner, 1))
11869 case UNSPEC_GOTOFF:
11870 case UNSPEC_PLTOFF:
11871 return TARGET_64BIT;
11873 x = XVECEXP (inner, 0, 0);
11874 return (GET_CODE (x) == SYMBOL_REF
11875 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11876 case UNSPEC_MACHOPIC_OFFSET:
11877 return legitimate_pic_address_disp_p (x);
11885 return legitimate_pic_address_disp_p (x);
11892 /* Determine if a given CONST RTX is a valid memory displacement
11896 legitimate_pic_address_disp_p (rtx disp)
11900 /* In 64bit mode we can allow direct addresses of symbols and labels
11901 when they are not dynamic symbols. */
11904 rtx op0 = disp, op1;
11906 switch (GET_CODE (disp))
11912 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11914 op0 = XEXP (XEXP (disp, 0), 0);
11915 op1 = XEXP (XEXP (disp, 0), 1);
11916 if (!CONST_INT_P (op1)
11917 || INTVAL (op1) >= 16*1024*1024
11918 || INTVAL (op1) < -16*1024*1024)
11920 if (GET_CODE (op0) == LABEL_REF)
11922 if (GET_CODE (op0) == CONST
11923 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11924 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11926 if (GET_CODE (op0) == UNSPEC
11927 && XINT (op0, 1) == UNSPEC_PCREL)
11929 if (GET_CODE (op0) != SYMBOL_REF)
11934 /* TLS references should always be enclosed in UNSPEC. */
11935 if (SYMBOL_REF_TLS_MODEL (op0))
11937 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11938 && ix86_cmodel != CM_LARGE_PIC)
11946 if (GET_CODE (disp) != CONST)
11948 disp = XEXP (disp, 0);
11952 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11953 of GOT tables. We should not need these anyway. */
11954 if (GET_CODE (disp) != UNSPEC
11955 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11956 && XINT (disp, 1) != UNSPEC_GOTOFF
11957 && XINT (disp, 1) != UNSPEC_PCREL
11958 && XINT (disp, 1) != UNSPEC_PLTOFF))
11961 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11962 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11968 if (GET_CODE (disp) == PLUS)
11970 if (!CONST_INT_P (XEXP (disp, 1)))
11972 disp = XEXP (disp, 0);
11976 if (TARGET_MACHO && darwin_local_data_pic (disp))
11979 if (GET_CODE (disp) != UNSPEC)
11982 switch (XINT (disp, 1))
11987 /* We need to check for both symbols and labels because VxWorks loads
11988 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11990 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11991 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11992 case UNSPEC_GOTOFF:
11993 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11994 While ABI specify also 32bit relocation but we don't produce it in
11995 small PIC model at all. */
11996 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11997 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11999 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12001 case UNSPEC_GOTTPOFF:
12002 case UNSPEC_GOTNTPOFF:
12003 case UNSPEC_INDNTPOFF:
12006 disp = XVECEXP (disp, 0, 0);
12007 return (GET_CODE (disp) == SYMBOL_REF
12008 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12009 case UNSPEC_NTPOFF:
12010 disp = XVECEXP (disp, 0, 0);
12011 return (GET_CODE (disp) == SYMBOL_REF
12012 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12013 case UNSPEC_DTPOFF:
12014 disp = XVECEXP (disp, 0, 0);
12015 return (GET_CODE (disp) == SYMBOL_REF
12016 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12022 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12023 replace the input X, or the original X if no replacement is called for.
12024 The output parameter *WIN is 1 if the calling macro should goto WIN,
12025 0 if it should not. */
12028 ix86_legitimize_reload_address (rtx x,
12029 enum machine_mode mode ATTRIBUTE_UNUSED,
12030 int opnum, int type,
12031 int ind_levels ATTRIBUTE_UNUSED)
12033 /* Reload can generate:
12035 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12039 This RTX is rejected from ix86_legitimate_address_p due to
12040 non-strictness of base register 97. Following this rejection,
12041 reload pushes all three components into separate registers,
12042 creating invalid memory address RTX.
12044 Following code reloads only the invalid part of the
12045 memory address RTX. */
12047 if (GET_CODE (x) == PLUS
12048 && REG_P (XEXP (x, 1))
12049 && GET_CODE (XEXP (x, 0)) == PLUS
12050 && REG_P (XEXP (XEXP (x, 0), 1)))
12053 bool something_reloaded = false;
12055 base = XEXP (XEXP (x, 0), 1);
12056 if (!REG_OK_FOR_BASE_STRICT_P (base))
12058 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12059 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12060 opnum, (enum reload_type)type);
12061 something_reloaded = true;
12064 index = XEXP (x, 1);
12065 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12067 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12068 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12069 opnum, (enum reload_type)type);
12070 something_reloaded = true;
12073 gcc_assert (something_reloaded);
12080 /* Recognizes RTL expressions that are valid memory addresses for an
12081 instruction. The MODE argument is the machine mode for the MEM
12082 expression that wants to use this address.
12084 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12085 convert common non-canonical forms to canonical form so that they will
12089 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12090 rtx addr, bool strict)
12092 struct ix86_address parts;
12093 rtx base, index, disp;
12094 HOST_WIDE_INT scale;
12096 if (ix86_decompose_address (addr, &parts) <= 0)
12097 /* Decomposition failed. */
12101 index = parts.index;
12103 scale = parts.scale;
12105 /* Validate base register. */
12112 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
12113 reg = SUBREG_REG (base);
12115 /* Base is not a register. */
12118 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
12121 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12122 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12123 /* Base is not valid. */
12127 /* Validate index register. */
12134 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
12135 reg = SUBREG_REG (index);
12137 /* Index is not a register. */
12140 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
12143 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12144 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12145 /* Index is not valid. */
12149 /* Index and base should have the same mode. */
12151 && GET_MODE (base) != GET_MODE (index))
12154 /* Validate scale factor. */
12158 /* Scale without index. */
12161 if (scale != 2 && scale != 4 && scale != 8)
12162 /* Scale is not a valid multiplier. */
12166 /* Validate displacement. */
12169 if (GET_CODE (disp) == CONST
12170 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12171 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12172 switch (XINT (XEXP (disp, 0), 1))
12174 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12175 used. While ABI specify also 32bit relocations, we don't produce
12176 them at all and use IP relative instead. */
12178 case UNSPEC_GOTOFF:
12179 gcc_assert (flag_pic);
12181 goto is_legitimate_pic;
12183 /* 64bit address unspec. */
12186 case UNSPEC_GOTPCREL:
12188 gcc_assert (flag_pic);
12189 goto is_legitimate_pic;
12191 case UNSPEC_GOTTPOFF:
12192 case UNSPEC_GOTNTPOFF:
12193 case UNSPEC_INDNTPOFF:
12194 case UNSPEC_NTPOFF:
12195 case UNSPEC_DTPOFF:
12198 case UNSPEC_STACK_CHECK:
12199 gcc_assert (flag_split_stack);
12203 /* Invalid address unspec. */
12207 else if (SYMBOLIC_CONST (disp)
12211 && MACHOPIC_INDIRECT
12212 && !machopic_operand_p (disp)
12218 if (TARGET_64BIT && (index || base))
12220 /* foo@dtpoff(%rX) is ok. */
12221 if (GET_CODE (disp) != CONST
12222 || GET_CODE (XEXP (disp, 0)) != PLUS
12223 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12224 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12225 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12226 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12227 /* Non-constant pic memory reference. */
12230 else if ((!TARGET_MACHO || flag_pic)
12231 && ! legitimate_pic_address_disp_p (disp))
12232 /* Displacement is an invalid pic construct. */
12235 else if (MACHO_DYNAMIC_NO_PIC_P
12236 && !ix86_legitimate_constant_p (Pmode, disp))
12237 /* displacment must be referenced via non_lazy_pointer */
12241 /* This code used to verify that a symbolic pic displacement
12242 includes the pic_offset_table_rtx register.
12244 While this is good idea, unfortunately these constructs may
12245 be created by "adds using lea" optimization for incorrect
12254 This code is nonsensical, but results in addressing
12255 GOT table with pic_offset_table_rtx base. We can't
12256 just refuse it easily, since it gets matched by
12257 "addsi3" pattern, that later gets split to lea in the
12258 case output register differs from input. While this
12259 can be handled by separate addsi pattern for this case
12260 that never results in lea, this seems to be easier and
12261 correct fix for crash to disable this test. */
12263 else if (GET_CODE (disp) != LABEL_REF
12264 && !CONST_INT_P (disp)
12265 && (GET_CODE (disp) != CONST
12266 || !ix86_legitimate_constant_p (Pmode, disp))
12267 && (GET_CODE (disp) != SYMBOL_REF
12268 || !ix86_legitimate_constant_p (Pmode, disp)))
12269 /* Displacement is not constant. */
12271 else if (TARGET_64BIT
12272 && !x86_64_immediate_operand (disp, VOIDmode))
12273 /* Displacement is out of range. */
12277 /* Everything looks valid. */
12281 /* Determine if a given RTX is a valid constant address. */
12284 constant_address_p (rtx x)
12286 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12289 /* Return a unique alias set for the GOT. */
12291 static alias_set_type
12292 ix86_GOT_alias_set (void)
12294 static alias_set_type set = -1;
12296 set = new_alias_set ();
12300 /* Return a legitimate reference for ORIG (an address) using the
12301 register REG. If REG is 0, a new pseudo is generated.
12303 There are two types of references that must be handled:
12305 1. Global data references must load the address from the GOT, via
12306 the PIC reg. An insn is emitted to do this load, and the reg is
12309 2. Static data references, constant pool addresses, and code labels
12310 compute the address as an offset from the GOT, whose base is in
12311 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12312 differentiate them from global data objects. The returned
12313 address is the PIC reg + an unspec constant.
12315 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12316 reg also appears in the address. */
12319 legitimize_pic_address (rtx orig, rtx reg)
12322 rtx new_rtx = orig;
12326 if (TARGET_MACHO && !TARGET_64BIT)
12329 reg = gen_reg_rtx (Pmode);
12330 /* Use the generic Mach-O PIC machinery. */
12331 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12335 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12337 else if (TARGET_64BIT
12338 && ix86_cmodel != CM_SMALL_PIC
12339 && gotoff_operand (addr, Pmode))
12342 /* This symbol may be referenced via a displacement from the PIC
12343 base address (@GOTOFF). */
12345 if (reload_in_progress)
12346 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12347 if (GET_CODE (addr) == CONST)
12348 addr = XEXP (addr, 0);
12349 if (GET_CODE (addr) == PLUS)
12351 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12353 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12356 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12357 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12359 tmpreg = gen_reg_rtx (Pmode);
12362 emit_move_insn (tmpreg, new_rtx);
12366 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12367 tmpreg, 1, OPTAB_DIRECT);
12370 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12372 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12374 /* This symbol may be referenced via a displacement from the PIC
12375 base address (@GOTOFF). */
12377 if (reload_in_progress)
12378 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12379 if (GET_CODE (addr) == CONST)
12380 addr = XEXP (addr, 0);
12381 if (GET_CODE (addr) == PLUS)
12383 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12385 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12388 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12389 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12390 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12394 emit_move_insn (reg, new_rtx);
12398 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12399 /* We can't use @GOTOFF for text labels on VxWorks;
12400 see gotoff_operand. */
12401 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12403 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12405 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12406 return legitimize_dllimport_symbol (addr, true);
12407 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12408 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12409 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12411 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12412 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12416 /* For x64 PE-COFF there is no GOT table. So we use address
12418 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12420 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12421 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12424 reg = gen_reg_rtx (Pmode);
12425 emit_move_insn (reg, new_rtx);
12428 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12430 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12431 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12432 new_rtx = gen_const_mem (Pmode, new_rtx);
12433 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12436 reg = gen_reg_rtx (Pmode);
12437 /* Use directly gen_movsi, otherwise the address is loaded
12438 into register for CSE. We don't want to CSE this addresses,
12439 instead we CSE addresses from the GOT table, so skip this. */
12440 emit_insn (gen_movsi (reg, new_rtx));
12445 /* This symbol must be referenced via a load from the
12446 Global Offset Table (@GOT). */
12448 if (reload_in_progress)
12449 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12450 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12451 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12453 new_rtx = force_reg (Pmode, new_rtx);
12454 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12455 new_rtx = gen_const_mem (Pmode, new_rtx);
12456 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12459 reg = gen_reg_rtx (Pmode);
12460 emit_move_insn (reg, new_rtx);
12466 if (CONST_INT_P (addr)
12467 && !x86_64_immediate_operand (addr, VOIDmode))
12471 emit_move_insn (reg, addr);
12475 new_rtx = force_reg (Pmode, addr);
12477 else if (GET_CODE (addr) == CONST)
12479 addr = XEXP (addr, 0);
12481 /* We must match stuff we generate before. Assume the only
12482 unspecs that can get here are ours. Not that we could do
12483 anything with them anyway.... */
12484 if (GET_CODE (addr) == UNSPEC
12485 || (GET_CODE (addr) == PLUS
12486 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12488 gcc_assert (GET_CODE (addr) == PLUS);
12490 if (GET_CODE (addr) == PLUS)
12492 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12494 /* Check first to see if this is a constant offset from a @GOTOFF
12495 symbol reference. */
12496 if (gotoff_operand (op0, Pmode)
12497 && CONST_INT_P (op1))
12501 if (reload_in_progress)
12502 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12503 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12505 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12506 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12507 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12511 emit_move_insn (reg, new_rtx);
12517 if (INTVAL (op1) < -16*1024*1024
12518 || INTVAL (op1) >= 16*1024*1024)
12520 if (!x86_64_immediate_operand (op1, Pmode))
12521 op1 = force_reg (Pmode, op1);
12522 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12528 base = legitimize_pic_address (XEXP (addr, 0), reg);
12529 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12530 base == reg ? NULL_RTX : reg);
12532 if (CONST_INT_P (new_rtx))
12533 new_rtx = plus_constant (base, INTVAL (new_rtx));
12536 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12538 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12539 new_rtx = XEXP (new_rtx, 1);
12541 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12549 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12552 get_thread_pointer (bool to_reg)
12554 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12556 if (GET_MODE (tp) != Pmode)
12557 tp = convert_to_mode (Pmode, tp, 1);
12560 tp = copy_addr_to_reg (tp);
12565 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12567 static GTY(()) rtx ix86_tls_symbol;
12570 ix86_tls_get_addr (void)
12572 if (!ix86_tls_symbol)
12575 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12576 ? "___tls_get_addr" : "__tls_get_addr");
12578 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12581 return ix86_tls_symbol;
12584 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12586 static GTY(()) rtx ix86_tls_module_base_symbol;
12589 ix86_tls_module_base (void)
12591 if (!ix86_tls_module_base_symbol)
12593 ix86_tls_module_base_symbol
12594 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12596 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12597 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12600 return ix86_tls_module_base_symbol;
12603 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12604 false if we expect this to be used for a memory address and true if
12605 we expect to load the address into a register. */
12608 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12610 rtx dest, base, off;
12611 rtx pic = NULL_RTX, tp = NULL_RTX;
12616 case TLS_MODEL_GLOBAL_DYNAMIC:
12617 dest = gen_reg_rtx (Pmode);
12622 pic = pic_offset_table_rtx;
12625 pic = gen_reg_rtx (Pmode);
12626 emit_insn (gen_set_got (pic));
12630 if (TARGET_GNU2_TLS)
12633 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12635 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12637 tp = get_thread_pointer (true);
12638 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12640 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12644 rtx caddr = ix86_tls_get_addr ();
12648 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12651 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12652 insns = get_insns ();
12655 RTL_CONST_CALL_P (insns) = 1;
12656 emit_libcall_block (insns, dest, rax, x);
12659 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12663 case TLS_MODEL_LOCAL_DYNAMIC:
12664 base = gen_reg_rtx (Pmode);
12669 pic = pic_offset_table_rtx;
12672 pic = gen_reg_rtx (Pmode);
12673 emit_insn (gen_set_got (pic));
12677 if (TARGET_GNU2_TLS)
12679 rtx tmp = ix86_tls_module_base ();
12682 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12684 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12686 tp = get_thread_pointer (true);
12687 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12688 gen_rtx_MINUS (Pmode, tmp, tp));
12692 rtx caddr = ix86_tls_get_addr ();
12696 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12699 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12700 insns = get_insns ();
12703 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12704 share the LD_BASE result with other LD model accesses. */
12705 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12706 UNSPEC_TLS_LD_BASE);
12708 RTL_CONST_CALL_P (insns) = 1;
12709 emit_libcall_block (insns, base, rax, eqv);
12712 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12715 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12716 off = gen_rtx_CONST (Pmode, off);
12718 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12720 if (TARGET_GNU2_TLS)
12722 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12724 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12728 case TLS_MODEL_INITIAL_EXEC:
12731 if (TARGET_SUN_TLS)
12733 /* The Sun linker took the AMD64 TLS spec literally
12734 and can only handle %rax as destination of the
12735 initial executable code sequence. */
12737 dest = gen_reg_rtx (Pmode);
12738 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12743 type = UNSPEC_GOTNTPOFF;
12747 if (reload_in_progress)
12748 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12749 pic = pic_offset_table_rtx;
12750 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12752 else if (!TARGET_ANY_GNU_TLS)
12754 pic = gen_reg_rtx (Pmode);
12755 emit_insn (gen_set_got (pic));
12756 type = UNSPEC_GOTTPOFF;
12761 type = UNSPEC_INDNTPOFF;
12764 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12765 off = gen_rtx_CONST (Pmode, off);
12767 off = gen_rtx_PLUS (Pmode, pic, off);
12768 off = gen_const_mem (Pmode, off);
12769 set_mem_alias_set (off, ix86_GOT_alias_set ());
12771 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12773 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12774 off = force_reg (Pmode, off);
12775 return gen_rtx_PLUS (Pmode, base, off);
12779 base = get_thread_pointer (true);
12780 dest = gen_reg_rtx (Pmode);
12781 emit_insn (gen_subsi3 (dest, base, off));
12785 case TLS_MODEL_LOCAL_EXEC:
12786 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12787 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12788 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12789 off = gen_rtx_CONST (Pmode, off);
12791 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12793 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12794 return gen_rtx_PLUS (Pmode, base, off);
12798 base = get_thread_pointer (true);
12799 dest = gen_reg_rtx (Pmode);
12800 emit_insn (gen_subsi3 (dest, base, off));
12805 gcc_unreachable ();
12811 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12814 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12815 htab_t dllimport_map;
12818 get_dllimport_decl (tree decl)
12820 struct tree_map *h, in;
12823 const char *prefix;
12824 size_t namelen, prefixlen;
12829 if (!dllimport_map)
12830 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12832 in.hash = htab_hash_pointer (decl);
12833 in.base.from = decl;
12834 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12835 h = (struct tree_map *) *loc;
12839 *loc = h = ggc_alloc_tree_map ();
12841 h->base.from = decl;
12842 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12843 VAR_DECL, NULL, ptr_type_node);
12844 DECL_ARTIFICIAL (to) = 1;
12845 DECL_IGNORED_P (to) = 1;
12846 DECL_EXTERNAL (to) = 1;
12847 TREE_READONLY (to) = 1;
12849 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12850 name = targetm.strip_name_encoding (name);
12851 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12852 ? "*__imp_" : "*__imp__";
12853 namelen = strlen (name);
12854 prefixlen = strlen (prefix);
12855 imp_name = (char *) alloca (namelen + prefixlen + 1);
12856 memcpy (imp_name, prefix, prefixlen);
12857 memcpy (imp_name + prefixlen, name, namelen + 1);
12859 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12860 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12861 SET_SYMBOL_REF_DECL (rtl, to);
12862 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12864 rtl = gen_const_mem (Pmode, rtl);
12865 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12867 SET_DECL_RTL (to, rtl);
12868 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12873 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12874 true if we require the result be a register. */
12877 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12882 gcc_assert (SYMBOL_REF_DECL (symbol));
12883 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12885 x = DECL_RTL (imp_decl);
12887 x = force_reg (Pmode, x);
12891 /* Try machine-dependent ways of modifying an illegitimate address
12892 to be legitimate. If we find one, return the new, valid address.
12893 This macro is used in only one place: `memory_address' in explow.c.
12895 OLDX is the address as it was before break_out_memory_refs was called.
12896 In some cases it is useful to look at this to decide what needs to be done.
12898 It is always safe for this macro to do nothing. It exists to recognize
12899 opportunities to optimize the output.
12901 For the 80386, we handle X+REG by loading X into a register R and
12902 using R+REG. R will go in a general reg and indexing will be used.
12903 However, if REG is a broken-out memory address or multiplication,
12904 nothing needs to be done because REG can certainly go in a general reg.
12906 When -fpic is used, special handling is needed for symbolic references.
12907 See comments by legitimize_pic_address in i386.c for details. */
12910 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12911 enum machine_mode mode)
12916 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12918 return legitimize_tls_address (x, (enum tls_model) log, false);
12919 if (GET_CODE (x) == CONST
12920 && GET_CODE (XEXP (x, 0)) == PLUS
12921 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12922 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12924 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12925 (enum tls_model) log, false);
12926 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12929 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12931 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12932 return legitimize_dllimport_symbol (x, true);
12933 if (GET_CODE (x) == CONST
12934 && GET_CODE (XEXP (x, 0)) == PLUS
12935 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12936 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12938 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12939 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12943 if (flag_pic && SYMBOLIC_CONST (x))
12944 return legitimize_pic_address (x, 0);
12947 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12948 return machopic_indirect_data_reference (x, 0);
12951 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12952 if (GET_CODE (x) == ASHIFT
12953 && CONST_INT_P (XEXP (x, 1))
12954 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12957 log = INTVAL (XEXP (x, 1));
12958 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12959 GEN_INT (1 << log));
12962 if (GET_CODE (x) == PLUS)
12964 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12966 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12967 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12968 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12971 log = INTVAL (XEXP (XEXP (x, 0), 1));
12972 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12973 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12974 GEN_INT (1 << log));
12977 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12978 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12979 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12982 log = INTVAL (XEXP (XEXP (x, 1), 1));
12983 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12984 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12985 GEN_INT (1 << log));
12988 /* Put multiply first if it isn't already. */
12989 if (GET_CODE (XEXP (x, 1)) == MULT)
12991 rtx tmp = XEXP (x, 0);
12992 XEXP (x, 0) = XEXP (x, 1);
12997 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12998 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12999 created by virtual register instantiation, register elimination, and
13000 similar optimizations. */
13001 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13004 x = gen_rtx_PLUS (Pmode,
13005 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13006 XEXP (XEXP (x, 1), 0)),
13007 XEXP (XEXP (x, 1), 1));
13011 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13012 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13013 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13014 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13015 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13016 && CONSTANT_P (XEXP (x, 1)))
13019 rtx other = NULL_RTX;
13021 if (CONST_INT_P (XEXP (x, 1)))
13023 constant = XEXP (x, 1);
13024 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13026 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13028 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13029 other = XEXP (x, 1);
13037 x = gen_rtx_PLUS (Pmode,
13038 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13039 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13040 plus_constant (other, INTVAL (constant)));
13044 if (changed && ix86_legitimate_address_p (mode, x, false))
13047 if (GET_CODE (XEXP (x, 0)) == MULT)
13050 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13053 if (GET_CODE (XEXP (x, 1)) == MULT)
13056 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13060 && REG_P (XEXP (x, 1))
13061 && REG_P (XEXP (x, 0)))
13064 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13067 x = legitimize_pic_address (x, 0);
13070 if (changed && ix86_legitimate_address_p (mode, x, false))
13073 if (REG_P (XEXP (x, 0)))
13075 rtx temp = gen_reg_rtx (Pmode);
13076 rtx val = force_operand (XEXP (x, 1), temp);
13079 if (GET_MODE (val) != Pmode)
13080 val = convert_to_mode (Pmode, val, 1);
13081 emit_move_insn (temp, val);
13084 XEXP (x, 1) = temp;
13088 else if (REG_P (XEXP (x, 1)))
13090 rtx temp = gen_reg_rtx (Pmode);
13091 rtx val = force_operand (XEXP (x, 0), temp);
13094 if (GET_MODE (val) != Pmode)
13095 val = convert_to_mode (Pmode, val, 1);
13096 emit_move_insn (temp, val);
13099 XEXP (x, 0) = temp;
13107 /* Print an integer constant expression in assembler syntax. Addition
13108 and subtraction are the only arithmetic that may appear in these
13109 expressions. FILE is the stdio stream to write to, X is the rtx, and
13110 CODE is the operand print code from the output string. */
13113 output_pic_addr_const (FILE *file, rtx x, int code)
13117 switch (GET_CODE (x))
13120 gcc_assert (flag_pic);
13125 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13126 output_addr_const (file, x);
13129 const char *name = XSTR (x, 0);
13131 /* Mark the decl as referenced so that cgraph will
13132 output the function. */
13133 if (SYMBOL_REF_DECL (x))
13134 mark_decl_referenced (SYMBOL_REF_DECL (x));
13137 if (MACHOPIC_INDIRECT
13138 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13139 name = machopic_indirection_name (x, /*stub_p=*/true);
13141 assemble_name (file, name);
13143 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13144 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13145 fputs ("@PLT", file);
13152 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13153 assemble_name (asm_out_file, buf);
13157 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13161 /* This used to output parentheses around the expression,
13162 but that does not work on the 386 (either ATT or BSD assembler). */
13163 output_pic_addr_const (file, XEXP (x, 0), code);
13167 if (GET_MODE (x) == VOIDmode)
13169 /* We can use %d if the number is <32 bits and positive. */
13170 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13171 fprintf (file, "0x%lx%08lx",
13172 (unsigned long) CONST_DOUBLE_HIGH (x),
13173 (unsigned long) CONST_DOUBLE_LOW (x));
13175 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13178 /* We can't handle floating point constants;
13179 TARGET_PRINT_OPERAND must handle them. */
13180 output_operand_lossage ("floating constant misused");
13184 /* Some assemblers need integer constants to appear first. */
13185 if (CONST_INT_P (XEXP (x, 0)))
13187 output_pic_addr_const (file, XEXP (x, 0), code);
13189 output_pic_addr_const (file, XEXP (x, 1), code);
13193 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13194 output_pic_addr_const (file, XEXP (x, 1), code);
13196 output_pic_addr_const (file, XEXP (x, 0), code);
13202 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13203 output_pic_addr_const (file, XEXP (x, 0), code);
13205 output_pic_addr_const (file, XEXP (x, 1), code);
13207 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13211 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13213 bool f = i386_asm_output_addr_const_extra (file, x);
13218 gcc_assert (XVECLEN (x, 0) == 1);
13219 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13220 switch (XINT (x, 1))
13223 fputs ("@GOT", file);
13225 case UNSPEC_GOTOFF:
13226 fputs ("@GOTOFF", file);
13228 case UNSPEC_PLTOFF:
13229 fputs ("@PLTOFF", file);
13232 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13233 "(%rip)" : "[rip]", file);
13235 case UNSPEC_GOTPCREL:
13236 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13237 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13239 case UNSPEC_GOTTPOFF:
13240 /* FIXME: This might be @TPOFF in Sun ld too. */
13241 fputs ("@gottpoff", file);
13244 fputs ("@tpoff", file);
13246 case UNSPEC_NTPOFF:
13248 fputs ("@tpoff", file);
13250 fputs ("@ntpoff", file);
13252 case UNSPEC_DTPOFF:
13253 fputs ("@dtpoff", file);
13255 case UNSPEC_GOTNTPOFF:
13257 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13258 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13260 fputs ("@gotntpoff", file);
13262 case UNSPEC_INDNTPOFF:
13263 fputs ("@indntpoff", file);
13266 case UNSPEC_MACHOPIC_OFFSET:
13268 machopic_output_function_base_name (file);
13272 output_operand_lossage ("invalid UNSPEC as operand");
13278 output_operand_lossage ("invalid expression as operand");
13282 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13283 We need to emit DTP-relative relocations. */
13285 static void ATTRIBUTE_UNUSED
13286 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13288 fputs (ASM_LONG, file);
13289 output_addr_const (file, x);
13290 fputs ("@dtpoff", file);
13296 fputs (", 0", file);
13299 gcc_unreachable ();
13303 /* Return true if X is a representation of the PIC register. This copes
13304 with calls from ix86_find_base_term, where the register might have
13305 been replaced by a cselib value. */
13308 ix86_pic_register_p (rtx x)
13310 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13311 return (pic_offset_table_rtx
13312 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13314 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13317 /* Helper function for ix86_delegitimize_address.
13318 Attempt to delegitimize TLS local-exec accesses. */
13321 ix86_delegitimize_tls_address (rtx orig_x)
13323 rtx x = orig_x, unspec;
13324 struct ix86_address addr;
13326 if (!TARGET_TLS_DIRECT_SEG_REFS)
13330 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13332 if (ix86_decompose_address (x, &addr) == 0
13333 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13334 || addr.disp == NULL_RTX
13335 || GET_CODE (addr.disp) != CONST)
13337 unspec = XEXP (addr.disp, 0);
13338 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13339 unspec = XEXP (unspec, 0);
13340 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13342 x = XVECEXP (unspec, 0, 0);
13343 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13344 if (unspec != XEXP (addr.disp, 0))
13345 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13348 rtx idx = addr.index;
13349 if (addr.scale != 1)
13350 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13351 x = gen_rtx_PLUS (Pmode, idx, x);
13354 x = gen_rtx_PLUS (Pmode, addr.base, x);
13355 if (MEM_P (orig_x))
13356 x = replace_equiv_address_nv (orig_x, x);
13360 /* In the name of slightly smaller debug output, and to cater to
13361 general assembler lossage, recognize PIC+GOTOFF and turn it back
13362 into a direct symbol reference.
13364 On Darwin, this is necessary to avoid a crash, because Darwin
13365 has a different PIC label for each routine but the DWARF debugging
13366 information is not associated with any particular routine, so it's
13367 necessary to remove references to the PIC label from RTL stored by
13368 the DWARF output code. */
13371 ix86_delegitimize_address (rtx x)
13373 rtx orig_x = delegitimize_mem_from_attrs (x);
13374 /* addend is NULL or some rtx if x is something+GOTOFF where
13375 something doesn't include the PIC register. */
13376 rtx addend = NULL_RTX;
13377 /* reg_addend is NULL or a multiple of some register. */
13378 rtx reg_addend = NULL_RTX;
13379 /* const_addend is NULL or a const_int. */
13380 rtx const_addend = NULL_RTX;
13381 /* This is the result, or NULL. */
13382 rtx result = NULL_RTX;
13391 if (GET_CODE (x) == CONST
13392 && GET_CODE (XEXP (x, 0)) == PLUS
13393 && GET_MODE (XEXP (x, 0)) == Pmode
13394 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13395 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13396 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13398 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13399 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13400 if (MEM_P (orig_x))
13401 x = replace_equiv_address_nv (orig_x, x);
13404 if (GET_CODE (x) != CONST
13405 || GET_CODE (XEXP (x, 0)) != UNSPEC
13406 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13407 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13408 || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
13409 return ix86_delegitimize_tls_address (orig_x);
13410 x = XVECEXP (XEXP (x, 0), 0, 0);
13411 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13413 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13421 if (GET_CODE (x) != PLUS
13422 || GET_CODE (XEXP (x, 1)) != CONST)
13423 return ix86_delegitimize_tls_address (orig_x);
13425 if (ix86_pic_register_p (XEXP (x, 0)))
13426 /* %ebx + GOT/GOTOFF */
13428 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13430 /* %ebx + %reg * scale + GOT/GOTOFF */
13431 reg_addend = XEXP (x, 0);
13432 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13433 reg_addend = XEXP (reg_addend, 1);
13434 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13435 reg_addend = XEXP (reg_addend, 0);
13438 reg_addend = NULL_RTX;
13439 addend = XEXP (x, 0);
13443 addend = XEXP (x, 0);
13445 x = XEXP (XEXP (x, 1), 0);
13446 if (GET_CODE (x) == PLUS
13447 && CONST_INT_P (XEXP (x, 1)))
13449 const_addend = XEXP (x, 1);
13453 if (GET_CODE (x) == UNSPEC
13454 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13455 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13456 result = XVECEXP (x, 0, 0);
13458 if (TARGET_MACHO && darwin_local_data_pic (x)
13459 && !MEM_P (orig_x))
13460 result = XVECEXP (x, 0, 0);
13463 return ix86_delegitimize_tls_address (orig_x);
13466 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13468 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13471 /* If the rest of original X doesn't involve the PIC register, add
13472 addend and subtract pic_offset_table_rtx. This can happen e.g.
13474 leal (%ebx, %ecx, 4), %ecx
13476 movl foo@GOTOFF(%ecx), %edx
13477 in which case we return (%ecx - %ebx) + foo. */
13478 if (pic_offset_table_rtx)
13479 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13480 pic_offset_table_rtx),
13485 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13487 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13488 if (result == NULL_RTX)
13494 /* If X is a machine specific address (i.e. a symbol or label being
13495 referenced as a displacement from the GOT implemented using an
13496 UNSPEC), then return the base term. Otherwise return X. */
13499 ix86_find_base_term (rtx x)
13505 if (GET_CODE (x) != CONST)
13507 term = XEXP (x, 0);
13508 if (GET_CODE (term) == PLUS
13509 && (CONST_INT_P (XEXP (term, 1))
13510 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13511 term = XEXP (term, 0);
13512 if (GET_CODE (term) != UNSPEC
13513 || (XINT (term, 1) != UNSPEC_GOTPCREL
13514 && XINT (term, 1) != UNSPEC_PCREL))
13517 return XVECEXP (term, 0, 0);
13520 return ix86_delegitimize_address (x);
13524 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13525 int fp, FILE *file)
13527 const char *suffix;
13529 if (mode == CCFPmode || mode == CCFPUmode)
13531 code = ix86_fp_compare_code_to_integer (code);
13535 code = reverse_condition (code);
13586 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13590 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13591 Those same assemblers have the same but opposite lossage on cmov. */
13592 if (mode == CCmode)
13593 suffix = fp ? "nbe" : "a";
13594 else if (mode == CCCmode)
13597 gcc_unreachable ();
13613 gcc_unreachable ();
13617 gcc_assert (mode == CCmode || mode == CCCmode);
13634 gcc_unreachable ();
13638 /* ??? As above. */
13639 gcc_assert (mode == CCmode || mode == CCCmode);
13640 suffix = fp ? "nb" : "ae";
13643 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13647 /* ??? As above. */
13648 if (mode == CCmode)
13650 else if (mode == CCCmode)
13651 suffix = fp ? "nb" : "ae";
13653 gcc_unreachable ();
13656 suffix = fp ? "u" : "p";
13659 suffix = fp ? "nu" : "np";
13662 gcc_unreachable ();
13664 fputs (suffix, file);
13667 /* Print the name of register X to FILE based on its machine mode and number.
13668 If CODE is 'w', pretend the mode is HImode.
13669 If CODE is 'b', pretend the mode is QImode.
13670 If CODE is 'k', pretend the mode is SImode.
13671 If CODE is 'q', pretend the mode is DImode.
13672 If CODE is 'x', pretend the mode is V4SFmode.
13673 If CODE is 't', pretend the mode is V8SFmode.
13674 If CODE is 'h', pretend the reg is the 'high' byte register.
13675 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13676 If CODE is 'd', duplicate the operand for AVX instruction.
13680 print_reg (rtx x, int code, FILE *file)
13683 bool duplicated = code == 'd' && TARGET_AVX;
13685 gcc_assert (x == pc_rtx
13686 || (REGNO (x) != ARG_POINTER_REGNUM
13687 && REGNO (x) != FRAME_POINTER_REGNUM
13688 && REGNO (x) != FLAGS_REG
13689 && REGNO (x) != FPSR_REG
13690 && REGNO (x) != FPCR_REG));
13692 if (ASSEMBLER_DIALECT == ASM_ATT)
13697 gcc_assert (TARGET_64BIT);
13698 fputs ("rip", file);
13702 if (code == 'w' || MMX_REG_P (x))
13704 else if (code == 'b')
13706 else if (code == 'k')
13708 else if (code == 'q')
13710 else if (code == 'y')
13712 else if (code == 'h')
13714 else if (code == 'x')
13716 else if (code == 't')
13719 code = GET_MODE_SIZE (GET_MODE (x));
13721 /* Irritatingly, AMD extended registers use different naming convention
13722 from the normal registers: "r%d[bwd]" */
13723 if (REX_INT_REG_P (x))
13725 gcc_assert (TARGET_64BIT);
13727 fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8);
13731 error ("extended registers have no high halves");
13746 error ("unsupported operand size for extended register");
13756 if (STACK_TOP_P (x))
13765 if (! ANY_FP_REG_P (x))
13766 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13771 reg = hi_reg_name[REGNO (x)];
13774 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13776 reg = qi_reg_name[REGNO (x)];
13779 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13781 reg = qi_high_reg_name[REGNO (x)];
13786 gcc_assert (!duplicated);
13788 fputs (hi_reg_name[REGNO (x)] + 1, file);
13793 gcc_unreachable ();
13799 if (ASSEMBLER_DIALECT == ASM_ATT)
13800 fprintf (file, ", %%%s", reg);
13802 fprintf (file, ", %s", reg);
13806 /* Locate some local-dynamic symbol still in use by this function
13807 so that we can print its name in some tls_local_dynamic_base
13811 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13815 if (GET_CODE (x) == SYMBOL_REF
13816 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13818 cfun->machine->some_ld_name = XSTR (x, 0);
13825 static const char *
13826 get_some_local_dynamic_name (void)
13830 if (cfun->machine->some_ld_name)
13831 return cfun->machine->some_ld_name;
13833 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13834 if (NONDEBUG_INSN_P (insn)
13835 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13836 return cfun->machine->some_ld_name;
13841 /* Meaning of CODE:
13842 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13843 C -- print opcode suffix for set/cmov insn.
13844 c -- like C, but print reversed condition
13845 F,f -- likewise, but for floating-point.
13846 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13848 R -- print the prefix for register names.
13849 z -- print the opcode suffix for the size of the current operand.
13850 Z -- likewise, with special suffixes for x87 instructions.
13851 * -- print a star (in certain assembler syntax)
13852 A -- print an absolute memory reference.
13853 E -- print address with DImode register names if TARGET_64BIT.
13854 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13855 s -- print a shift double count, followed by the assemblers argument
13857 b -- print the QImode name of the register for the indicated operand.
13858 %b0 would print %al if operands[0] is reg 0.
13859 w -- likewise, print the HImode name of the register.
13860 k -- likewise, print the SImode name of the register.
13861 q -- likewise, print the DImode name of the register.
13862 x -- likewise, print the V4SFmode name of the register.
13863 t -- likewise, print the V8SFmode name of the register.
13864 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13865 y -- print "st(0)" instead of "st" as a register.
13866 d -- print duplicated register operand for AVX instruction.
13867 D -- print condition for SSE cmp instruction.
13868 P -- if PIC, print an @PLT suffix.
13869 p -- print raw symbol name.
13870 X -- don't print any sort of PIC '@' suffix for a symbol.
13871 & -- print some in-use local-dynamic symbol name.
13872 H -- print a memory address offset by 8; used for sse high-parts
13873 Y -- print condition for XOP pcom* instruction.
13874 + -- print a branch hint as 'cs' or 'ds' prefix
13875 ; -- print a semicolon (after prefixes due to bug in older gas).
13876 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13877 @ -- print a segment register of thread base pointer load
13881 ix86_print_operand (FILE *file, rtx x, int code)
13888 if (ASSEMBLER_DIALECT == ASM_ATT)
13894 const char *name = get_some_local_dynamic_name ();
13896 output_operand_lossage ("'%%&' used without any "
13897 "local dynamic TLS references");
13899 assemble_name (file, name);
13904 switch (ASSEMBLER_DIALECT)
13911 /* Intel syntax. For absolute addresses, registers should not
13912 be surrounded by braces. */
13916 ix86_print_operand (file, x, 0);
13923 gcc_unreachable ();
13926 ix86_print_operand (file, x, 0);
13930 /* Wrap address in an UNSPEC to declare special handling. */
13932 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13934 output_address (x);
13938 if (ASSEMBLER_DIALECT == ASM_ATT)
13943 if (ASSEMBLER_DIALECT == ASM_ATT)
13948 if (ASSEMBLER_DIALECT == ASM_ATT)
13953 if (ASSEMBLER_DIALECT == ASM_ATT)
13958 if (ASSEMBLER_DIALECT == ASM_ATT)
13963 if (ASSEMBLER_DIALECT == ASM_ATT)
13968 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13970 /* Opcodes don't get size suffixes if using Intel opcodes. */
13971 if (ASSEMBLER_DIALECT == ASM_INTEL)
13974 switch (GET_MODE_SIZE (GET_MODE (x)))
13993 output_operand_lossage
13994 ("invalid operand size for operand code '%c'", code);
13999 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14001 (0, "non-integer operand used with operand code '%c'", code);
14005 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14006 if (ASSEMBLER_DIALECT == ASM_INTEL)
14009 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14011 switch (GET_MODE_SIZE (GET_MODE (x)))
14014 #ifdef HAVE_AS_IX86_FILDS
14024 #ifdef HAVE_AS_IX86_FILDQ
14027 fputs ("ll", file);
14035 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14037 /* 387 opcodes don't get size suffixes
14038 if the operands are registers. */
14039 if (STACK_REG_P (x))
14042 switch (GET_MODE_SIZE (GET_MODE (x)))
14063 output_operand_lossage
14064 ("invalid operand type used with operand code '%c'", code);
14068 output_operand_lossage
14069 ("invalid operand size for operand code '%c'", code);
14087 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14089 ix86_print_operand (file, x, 0);
14090 fputs (", ", file);
14095 /* Little bit of braindamage here. The SSE compare instructions
14096 does use completely different names for the comparisons that the
14097 fp conditional moves. */
14100 switch (GET_CODE (x))
14103 fputs ("eq", file);
14106 fputs ("eq_us", file);
14109 fputs ("lt", file);
14112 fputs ("nge", file);
14115 fputs ("le", file);
14118 fputs ("ngt", file);
14121 fputs ("unord", file);
14124 fputs ("neq", file);
14127 fputs ("neq_oq", file);
14130 fputs ("ge", file);
14133 fputs ("nlt", file);
14136 fputs ("gt", file);
14139 fputs ("nle", file);
14142 fputs ("ord", file);
14145 output_operand_lossage ("operand is not a condition code, "
14146 "invalid operand code 'D'");
14152 switch (GET_CODE (x))
14156 fputs ("eq", file);
14160 fputs ("lt", file);
14164 fputs ("le", file);
14167 fputs ("unord", file);
14171 fputs ("neq", file);
14175 fputs ("nlt", file);
14179 fputs ("nle", file);
14182 fputs ("ord", file);
14185 output_operand_lossage ("operand is not a condition code, "
14186 "invalid operand code 'D'");
14192 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14193 if (ASSEMBLER_DIALECT == ASM_ATT)
14195 switch (GET_MODE (x))
14197 case HImode: putc ('w', file); break;
14199 case SFmode: putc ('l', file); break;
14201 case DFmode: putc ('q', file); break;
14202 default: gcc_unreachable ();
14209 if (!COMPARISON_P (x))
14211 output_operand_lossage ("operand is neither a constant nor a "
14212 "condition code, invalid operand code "
14216 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14219 if (!COMPARISON_P (x))
14221 output_operand_lossage ("operand is neither a constant nor a "
14222 "condition code, invalid operand code "
14226 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14227 if (ASSEMBLER_DIALECT == ASM_ATT)
14230 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14233 /* Like above, but reverse condition */
14235 /* Check to see if argument to %c is really a constant
14236 and not a condition code which needs to be reversed. */
14237 if (!COMPARISON_P (x))
14239 output_operand_lossage ("operand is neither a constant nor a "
14240 "condition code, invalid operand "
14244 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14247 if (!COMPARISON_P (x))
14249 output_operand_lossage ("operand is neither a constant nor a "
14250 "condition code, invalid operand "
14254 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14255 if (ASSEMBLER_DIALECT == ASM_ATT)
14258 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14262 if (!offsettable_memref_p (x))
14264 output_operand_lossage ("operand is not an offsettable memory "
14265 "reference, invalid operand "
14269 /* It doesn't actually matter what mode we use here, as we're
14270 only going to use this for printing. */
14271 x = adjust_address_nv (x, DImode, 8);
14279 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14282 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14285 int pred_val = INTVAL (XEXP (x, 0));
14287 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14288 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14290 int taken = pred_val > REG_BR_PROB_BASE / 2;
14291 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14293 /* Emit hints only in the case default branch prediction
14294 heuristics would fail. */
14295 if (taken != cputaken)
14297 /* We use 3e (DS) prefix for taken branches and
14298 2e (CS) prefix for not taken branches. */
14300 fputs ("ds ; ", file);
14302 fputs ("cs ; ", file);
14310 switch (GET_CODE (x))
14313 fputs ("neq", file);
14316 fputs ("eq", file);
14320 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14324 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14328 fputs ("le", file);
14332 fputs ("lt", file);
14335 fputs ("unord", file);
14338 fputs ("ord", file);
14341 fputs ("ueq", file);
14344 fputs ("nlt", file);
14347 fputs ("nle", file);
14350 fputs ("ule", file);
14353 fputs ("ult", file);
14356 fputs ("une", file);
14359 output_operand_lossage ("operand is not a condition code, "
14360 "invalid operand code 'Y'");
14366 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14372 if (ASSEMBLER_DIALECT == ASM_ATT)
14375 /* The kernel uses a different segment register for performance
14376 reasons; a system call would not have to trash the userspace
14377 segment register, which would be expensive. */
14378 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14379 fputs ("fs", file);
14381 fputs ("gs", file);
14385 putc (TARGET_AVX2 ? 'i' : 'f', file);
14389 output_operand_lossage ("invalid operand code '%c'", code);
14394 print_reg (x, code, file);
14396 else if (MEM_P (x))
14398 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14399 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14400 && GET_MODE (x) != BLKmode)
14403 switch (GET_MODE_SIZE (GET_MODE (x)))
14405 case 1: size = "BYTE"; break;
14406 case 2: size = "WORD"; break;
14407 case 4: size = "DWORD"; break;
14408 case 8: size = "QWORD"; break;
14409 case 12: size = "TBYTE"; break;
14411 if (GET_MODE (x) == XFmode)
14416 case 32: size = "YMMWORD"; break;
14418 gcc_unreachable ();
14421 /* Check for explicit size override (codes 'b', 'w', 'k',
14425 else if (code == 'w')
14427 else if (code == 'k')
14429 else if (code == 'q')
14431 else if (code == 'x')
14434 fputs (size, file);
14435 fputs (" PTR ", file);
14439 /* Avoid (%rip) for call operands. */
14440 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14441 && !CONST_INT_P (x))
14442 output_addr_const (file, x);
14443 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14444 output_operand_lossage ("invalid constraints for operand");
14446 output_address (x);
14449 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14454 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14455 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14457 if (ASSEMBLER_DIALECT == ASM_ATT)
14459 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14461 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14463 fprintf (file, "0x%08x", (unsigned int) l);
14466 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14471 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14472 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14474 if (ASSEMBLER_DIALECT == ASM_ATT)
14476 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14479 /* These float cases don't actually occur as immediate operands. */
14480 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14484 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14485 fputs (dstr, file);
14490 /* We have patterns that allow zero sets of memory, for instance.
14491 In 64-bit mode, we should probably support all 8-byte vectors,
14492 since we can in fact encode that into an immediate. */
14493 if (GET_CODE (x) == CONST_VECTOR)
14495 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14499 if (code != 'P' && code != 'p')
14501 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14503 if (ASSEMBLER_DIALECT == ASM_ATT)
14506 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14507 || GET_CODE (x) == LABEL_REF)
14509 if (ASSEMBLER_DIALECT == ASM_ATT)
14512 fputs ("OFFSET FLAT:", file);
14515 if (CONST_INT_P (x))
14516 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14517 else if (flag_pic || MACHOPIC_INDIRECT)
14518 output_pic_addr_const (file, x, code);
14520 output_addr_const (file, x);
14525 ix86_print_operand_punct_valid_p (unsigned char code)
14527 return (code == '@' || code == '*' || code == '+'
14528 || code == '&' || code == ';' || code == '~');
14531 /* Print a memory operand whose address is ADDR. */
14534 ix86_print_operand_address (FILE *file, rtx addr)
14536 struct ix86_address parts;
14537 rtx base, index, disp;
14543 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14545 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14546 gcc_assert (parts.index == NULL_RTX);
14547 parts.index = XVECEXP (addr, 0, 1);
14548 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14549 addr = XVECEXP (addr, 0, 0);
14552 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14554 gcc_assert (TARGET_64BIT);
14555 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14559 ok = ix86_decompose_address (addr, &parts);
14563 if (parts.base && GET_CODE (parts.base) == SUBREG)
14565 rtx tmp = SUBREG_REG (parts.base);
14566 parts.base = simplify_subreg (GET_MODE (parts.base),
14567 tmp, GET_MODE (tmp), 0);
14568 gcc_assert (parts.base != NULL_RTX);
14571 if (parts.index && GET_CODE (parts.index) == SUBREG)
14573 rtx tmp = SUBREG_REG (parts.index);
14574 parts.index = simplify_subreg (GET_MODE (parts.index),
14575 tmp, GET_MODE (tmp), 0);
14576 gcc_assert (parts.index != NULL_RTX);
14580 index = parts.index;
14582 scale = parts.scale;
14590 if (ASSEMBLER_DIALECT == ASM_ATT)
14592 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14595 gcc_unreachable ();
14598 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14599 if (TARGET_64BIT && !base && !index)
14603 if (GET_CODE (disp) == CONST
14604 && GET_CODE (XEXP (disp, 0)) == PLUS
14605 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14606 symbol = XEXP (XEXP (disp, 0), 0);
14608 if (GET_CODE (symbol) == LABEL_REF
14609 || (GET_CODE (symbol) == SYMBOL_REF
14610 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14613 if (!base && !index)
14615 /* Displacement only requires special attention. */
14617 if (CONST_INT_P (disp))
14619 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14620 fputs ("ds:", file);
14621 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14624 output_pic_addr_const (file, disp, 0);
14626 output_addr_const (file, disp);
14630 /* Print SImode register names to force addr32 prefix. */
14631 if (GET_CODE (addr) == SUBREG)
14633 gcc_assert (TARGET_64BIT);
14634 gcc_assert (GET_MODE (addr) == SImode);
14635 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14636 gcc_assert (!code);
14639 else if (GET_CODE (addr) == ZERO_EXTEND
14640 || GET_CODE (addr) == AND)
14642 gcc_assert (TARGET_64BIT);
14643 gcc_assert (GET_MODE (addr) == DImode);
14644 gcc_assert (!code);
14648 if (ASSEMBLER_DIALECT == ASM_ATT)
14653 output_pic_addr_const (file, disp, 0);
14654 else if (GET_CODE (disp) == LABEL_REF)
14655 output_asm_label (disp);
14657 output_addr_const (file, disp);
14662 print_reg (base, code, file);
14666 print_reg (index, vsib ? 0 : code, file);
14667 if (scale != 1 || vsib)
14668 fprintf (file, ",%d", scale);
14674 rtx offset = NULL_RTX;
14678 /* Pull out the offset of a symbol; print any symbol itself. */
14679 if (GET_CODE (disp) == CONST
14680 && GET_CODE (XEXP (disp, 0)) == PLUS
14681 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14683 offset = XEXP (XEXP (disp, 0), 1);
14684 disp = gen_rtx_CONST (VOIDmode,
14685 XEXP (XEXP (disp, 0), 0));
14689 output_pic_addr_const (file, disp, 0);
14690 else if (GET_CODE (disp) == LABEL_REF)
14691 output_asm_label (disp);
14692 else if (CONST_INT_P (disp))
14695 output_addr_const (file, disp);
14701 print_reg (base, code, file);
14704 if (INTVAL (offset) >= 0)
14706 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14710 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14717 print_reg (index, vsib ? 0 : code, file);
14718 if (scale != 1 || vsib)
14719 fprintf (file, "*%d", scale);
14726 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14729 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14733 if (GET_CODE (x) != UNSPEC)
14736 op = XVECEXP (x, 0, 0);
14737 switch (XINT (x, 1))
14739 case UNSPEC_GOTTPOFF:
14740 output_addr_const (file, op);
14741 /* FIXME: This might be @TPOFF in Sun ld. */
14742 fputs ("@gottpoff", file);
14745 output_addr_const (file, op);
14746 fputs ("@tpoff", file);
14748 case UNSPEC_NTPOFF:
14749 output_addr_const (file, op);
14751 fputs ("@tpoff", file);
14753 fputs ("@ntpoff", file);
14755 case UNSPEC_DTPOFF:
14756 output_addr_const (file, op);
14757 fputs ("@dtpoff", file);
14759 case UNSPEC_GOTNTPOFF:
14760 output_addr_const (file, op);
14762 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14763 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14765 fputs ("@gotntpoff", file);
14767 case UNSPEC_INDNTPOFF:
14768 output_addr_const (file, op);
14769 fputs ("@indntpoff", file);
14772 case UNSPEC_MACHOPIC_OFFSET:
14773 output_addr_const (file, op);
14775 machopic_output_function_base_name (file);
14779 case UNSPEC_STACK_CHECK:
14783 gcc_assert (flag_split_stack);
14785 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14786 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14788 gcc_unreachable ();
14791 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14802 /* Split one or more double-mode RTL references into pairs of half-mode
14803 references. The RTL can be REG, offsettable MEM, integer constant, or
14804 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14805 split and "num" is its length. lo_half and hi_half are output arrays
14806 that parallel "operands". */
14809 split_double_mode (enum machine_mode mode, rtx operands[],
14810 int num, rtx lo_half[], rtx hi_half[])
14812 enum machine_mode half_mode;
14818 half_mode = DImode;
14821 half_mode = SImode;
14824 gcc_unreachable ();
14827 byte = GET_MODE_SIZE (half_mode);
14831 rtx op = operands[num];
14833 /* simplify_subreg refuse to split volatile memory addresses,
14834 but we still have to handle it. */
14837 lo_half[num] = adjust_address (op, half_mode, 0);
14838 hi_half[num] = adjust_address (op, half_mode, byte);
14842 lo_half[num] = simplify_gen_subreg (half_mode, op,
14843 GET_MODE (op) == VOIDmode
14844 ? mode : GET_MODE (op), 0);
14845 hi_half[num] = simplify_gen_subreg (half_mode, op,
14846 GET_MODE (op) == VOIDmode
14847 ? mode : GET_MODE (op), byte);
14852 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14853 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14854 is the expression of the binary operation. The output may either be
14855 emitted here, or returned to the caller, like all output_* functions.
14857 There is no guarantee that the operands are the same mode, as they
14858 might be within FLOAT or FLOAT_EXTEND expressions. */
14860 #ifndef SYSV386_COMPAT
14861 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14862 wants to fix the assemblers because that causes incompatibility
14863 with gcc. No-one wants to fix gcc because that causes
14864 incompatibility with assemblers... You can use the option of
14865 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14866 #define SYSV386_COMPAT 1
14870 output_387_binary_op (rtx insn, rtx *operands)
14872 static char buf[40];
14875 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14877 #ifdef ENABLE_CHECKING
14878 /* Even if we do not want to check the inputs, this documents input
14879 constraints. Which helps in understanding the following code. */
14880 if (STACK_REG_P (operands[0])
14881 && ((REG_P (operands[1])
14882 && REGNO (operands[0]) == REGNO (operands[1])
14883 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14884 || (REG_P (operands[2])
14885 && REGNO (operands[0]) == REGNO (operands[2])
14886 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14887 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14890 gcc_assert (is_sse);
14893 switch (GET_CODE (operands[3]))
14896 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14897 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14905 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14906 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14914 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14915 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14932 gcc_unreachable ();
14939 strcpy (buf, ssep);
14940 if (GET_MODE (operands[0]) == SFmode)
14941 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14943 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14947 strcpy (buf, ssep + 1);
14948 if (GET_MODE (operands[0]) == SFmode)
14949 strcat (buf, "ss\t{%2, %0|%0, %2}");
14951 strcat (buf, "sd\t{%2, %0|%0, %2}");
14957 switch (GET_CODE (operands[3]))
14961 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14963 rtx temp = operands[2];
14964 operands[2] = operands[1];
14965 operands[1] = temp;
14968 /* know operands[0] == operands[1]. */
14970 if (MEM_P (operands[2]))
14976 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14978 if (STACK_TOP_P (operands[0]))
14979 /* How is it that we are storing to a dead operand[2]?
14980 Well, presumably operands[1] is dead too. We can't
14981 store the result to st(0) as st(0) gets popped on this
14982 instruction. Instead store to operands[2] (which I
14983 think has to be st(1)). st(1) will be popped later.
14984 gcc <= 2.8.1 didn't have this check and generated
14985 assembly code that the Unixware assembler rejected. */
14986 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14988 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14992 if (STACK_TOP_P (operands[0]))
14993 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14995 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15000 if (MEM_P (operands[1]))
15006 if (MEM_P (operands[2]))
15012 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15015 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15016 derived assemblers, confusingly reverse the direction of
15017 the operation for fsub{r} and fdiv{r} when the
15018 destination register is not st(0). The Intel assembler
15019 doesn't have this brain damage. Read !SYSV386_COMPAT to
15020 figure out what the hardware really does. */
15021 if (STACK_TOP_P (operands[0]))
15022 p = "{p\t%0, %2|rp\t%2, %0}";
15024 p = "{rp\t%2, %0|p\t%0, %2}";
15026 if (STACK_TOP_P (operands[0]))
15027 /* As above for fmul/fadd, we can't store to st(0). */
15028 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15030 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15035 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15038 if (STACK_TOP_P (operands[0]))
15039 p = "{rp\t%0, %1|p\t%1, %0}";
15041 p = "{p\t%1, %0|rp\t%0, %1}";
15043 if (STACK_TOP_P (operands[0]))
15044 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15046 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15051 if (STACK_TOP_P (operands[0]))
15053 if (STACK_TOP_P (operands[1]))
15054 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15056 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15059 else if (STACK_TOP_P (operands[1]))
15062 p = "{\t%1, %0|r\t%0, %1}";
15064 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15070 p = "{r\t%2, %0|\t%0, %2}";
15072 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15078 gcc_unreachable ();
15085 /* Return needed mode for entity in optimize_mode_switching pass. */
15088 ix86_mode_needed (int entity, rtx insn)
15090 enum attr_i387_cw mode;
15092 /* The mode UNINITIALIZED is used to store control word after a
15093 function call or ASM pattern. The mode ANY specify that function
15094 has no requirements on the control word and make no changes in the
15095 bits we are interested in. */
15098 || (NONJUMP_INSN_P (insn)
15099 && (asm_noperands (PATTERN (insn)) >= 0
15100 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15101 return I387_CW_UNINITIALIZED;
15103 if (recog_memoized (insn) < 0)
15104 return I387_CW_ANY;
15106 mode = get_attr_i387_cw (insn);
15111 if (mode == I387_CW_TRUNC)
15116 if (mode == I387_CW_FLOOR)
15121 if (mode == I387_CW_CEIL)
15126 if (mode == I387_CW_MASK_PM)
15131 gcc_unreachable ();
15134 return I387_CW_ANY;
15137 /* Output code to initialize control word copies used by trunc?f?i and
15138 rounding patterns. CURRENT_MODE is set to current control word,
15139 while NEW_MODE is set to new control word. */
15142 emit_i387_cw_initialization (int mode)
15144 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15147 enum ix86_stack_slot slot;
15149 rtx reg = gen_reg_rtx (HImode);
15151 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15152 emit_move_insn (reg, copy_rtx (stored_mode));
15154 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15155 || optimize_function_for_size_p (cfun))
15159 case I387_CW_TRUNC:
15160 /* round toward zero (truncate) */
15161 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15162 slot = SLOT_CW_TRUNC;
15165 case I387_CW_FLOOR:
15166 /* round down toward -oo */
15167 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15168 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15169 slot = SLOT_CW_FLOOR;
15173 /* round up toward +oo */
15174 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15175 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15176 slot = SLOT_CW_CEIL;
15179 case I387_CW_MASK_PM:
15180 /* mask precision exception for nearbyint() */
15181 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15182 slot = SLOT_CW_MASK_PM;
15186 gcc_unreachable ();
15193 case I387_CW_TRUNC:
15194 /* round toward zero (truncate) */
15195 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15196 slot = SLOT_CW_TRUNC;
15199 case I387_CW_FLOOR:
15200 /* round down toward -oo */
15201 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15202 slot = SLOT_CW_FLOOR;
15206 /* round up toward +oo */
15207 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15208 slot = SLOT_CW_CEIL;
15211 case I387_CW_MASK_PM:
15212 /* mask precision exception for nearbyint() */
15213 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15214 slot = SLOT_CW_MASK_PM;
15218 gcc_unreachable ();
15222 gcc_assert (slot < MAX_386_STACK_LOCALS);
15224 new_mode = assign_386_stack_local (HImode, slot);
15225 emit_move_insn (new_mode, reg);
15228 /* Output code for INSN to convert a float to a signed int. OPERANDS
15229 are the insn operands. The output may be [HSD]Imode and the input
15230 operand may be [SDX]Fmode. */
15233 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15235 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15236 int dimode_p = GET_MODE (operands[0]) == DImode;
15237 int round_mode = get_attr_i387_cw (insn);
15239 /* Jump through a hoop or two for DImode, since the hardware has no
15240 non-popping instruction. We used to do this a different way, but
15241 that was somewhat fragile and broke with post-reload splitters. */
15242 if ((dimode_p || fisttp) && !stack_top_dies)
15243 output_asm_insn ("fld\t%y1", operands);
15245 gcc_assert (STACK_TOP_P (operands[1]));
15246 gcc_assert (MEM_P (operands[0]));
15247 gcc_assert (GET_MODE (operands[1]) != TFmode);
15250 output_asm_insn ("fisttp%Z0\t%0", operands);
15253 if (round_mode != I387_CW_ANY)
15254 output_asm_insn ("fldcw\t%3", operands);
15255 if (stack_top_dies || dimode_p)
15256 output_asm_insn ("fistp%Z0\t%0", operands);
15258 output_asm_insn ("fist%Z0\t%0", operands);
15259 if (round_mode != I387_CW_ANY)
15260 output_asm_insn ("fldcw\t%2", operands);
15266 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15267 have the values zero or one, indicates the ffreep insn's operand
15268 from the OPERANDS array. */
15270 static const char *
15271 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15273 if (TARGET_USE_FFREEP)
15274 #ifdef HAVE_AS_IX86_FFREEP
15275 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15278 static char retval[32];
15279 int regno = REGNO (operands[opno]);
15281 gcc_assert (FP_REGNO_P (regno));
15283 regno -= FIRST_STACK_REG;
15285 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15290 return opno ? "fstp\t%y1" : "fstp\t%y0";
15294 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15295 should be used. UNORDERED_P is true when fucom should be used. */
15298 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15300 int stack_top_dies;
15301 rtx cmp_op0, cmp_op1;
15302 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15306 cmp_op0 = operands[0];
15307 cmp_op1 = operands[1];
15311 cmp_op0 = operands[1];
15312 cmp_op1 = operands[2];
15317 if (GET_MODE (operands[0]) == SFmode)
15319 return "%vucomiss\t{%1, %0|%0, %1}";
15321 return "%vcomiss\t{%1, %0|%0, %1}";
15324 return "%vucomisd\t{%1, %0|%0, %1}";
15326 return "%vcomisd\t{%1, %0|%0, %1}";
15329 gcc_assert (STACK_TOP_P (cmp_op0));
15331 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15333 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15335 if (stack_top_dies)
15337 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15338 return output_387_ffreep (operands, 1);
15341 return "ftst\n\tfnstsw\t%0";
15344 if (STACK_REG_P (cmp_op1)
15346 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15347 && REGNO (cmp_op1) != FIRST_STACK_REG)
15349 /* If both the top of the 387 stack dies, and the other operand
15350 is also a stack register that dies, then this must be a
15351 `fcompp' float compare */
15355 /* There is no double popping fcomi variant. Fortunately,
15356 eflags is immune from the fstp's cc clobbering. */
15358 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15360 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15361 return output_387_ffreep (operands, 0);
15366 return "fucompp\n\tfnstsw\t%0";
15368 return "fcompp\n\tfnstsw\t%0";
15373 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15375 static const char * const alt[16] =
15377 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15378 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15379 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15380 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15382 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15383 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15387 "fcomi\t{%y1, %0|%0, %y1}",
15388 "fcomip\t{%y1, %0|%0, %y1}",
15389 "fucomi\t{%y1, %0|%0, %y1}",
15390 "fucomip\t{%y1, %0|%0, %y1}",
15401 mask = eflags_p << 3;
15402 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15403 mask |= unordered_p << 1;
15404 mask |= stack_top_dies;
15406 gcc_assert (mask < 16);
15415 ix86_output_addr_vec_elt (FILE *file, int value)
15417 const char *directive = ASM_LONG;
15421 directive = ASM_QUAD;
15423 gcc_assert (!TARGET_64BIT);
15426 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15430 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15432 const char *directive = ASM_LONG;
15435 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15436 directive = ASM_QUAD;
15438 gcc_assert (!TARGET_64BIT);
15440 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15441 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15442 fprintf (file, "%s%s%d-%s%d\n",
15443 directive, LPREFIX, value, LPREFIX, rel);
15444 else if (HAVE_AS_GOTOFF_IN_DATA)
15445 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15447 else if (TARGET_MACHO)
15449 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15450 machopic_output_function_base_name (file);
15455 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15456 GOT_SYMBOL_NAME, LPREFIX, value);
15459 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15463 ix86_expand_clear (rtx dest)
15467 /* We play register width games, which are only valid after reload. */
15468 gcc_assert (reload_completed);
15470 /* Avoid HImode and its attendant prefix byte. */
15471 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15472 dest = gen_rtx_REG (SImode, REGNO (dest));
15473 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15475 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15476 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15478 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15479 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15485 /* X is an unchanging MEM. If it is a constant pool reference, return
15486 the constant pool rtx, else NULL. */
15489 maybe_get_pool_constant (rtx x)
15491 x = ix86_delegitimize_address (XEXP (x, 0));
15493 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15494 return get_pool_constant (x);
15500 ix86_expand_move (enum machine_mode mode, rtx operands[])
15503 enum tls_model model;
15508 if (GET_CODE (op1) == SYMBOL_REF)
15510 model = SYMBOL_REF_TLS_MODEL (op1);
15513 op1 = legitimize_tls_address (op1, model, true);
15514 op1 = force_operand (op1, op0);
15517 if (GET_MODE (op1) != mode)
15518 op1 = convert_to_mode (mode, op1, 1);
15520 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15521 && SYMBOL_REF_DLLIMPORT_P (op1))
15522 op1 = legitimize_dllimport_symbol (op1, false);
15524 else if (GET_CODE (op1) == CONST
15525 && GET_CODE (XEXP (op1, 0)) == PLUS
15526 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15528 rtx addend = XEXP (XEXP (op1, 0), 1);
15529 rtx symbol = XEXP (XEXP (op1, 0), 0);
15532 model = SYMBOL_REF_TLS_MODEL (symbol);
15534 tmp = legitimize_tls_address (symbol, model, true);
15535 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15536 && SYMBOL_REF_DLLIMPORT_P (symbol))
15537 tmp = legitimize_dllimport_symbol (symbol, true);
15541 tmp = force_operand (tmp, NULL);
15542 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15543 op0, 1, OPTAB_DIRECT);
15546 if (GET_MODE (tmp) != mode)
15547 op1 = convert_to_mode (mode, tmp, 1);
15551 if ((flag_pic || MACHOPIC_INDIRECT)
15552 && symbolic_operand (op1, mode))
15554 if (TARGET_MACHO && !TARGET_64BIT)
15557 /* dynamic-no-pic */
15558 if (MACHOPIC_INDIRECT)
15560 rtx temp = ((reload_in_progress
15561 || ((op0 && REG_P (op0))
15563 ? op0 : gen_reg_rtx (Pmode));
15564 op1 = machopic_indirect_data_reference (op1, temp);
15566 op1 = machopic_legitimize_pic_address (op1, mode,
15567 temp == op1 ? 0 : temp);
15569 if (op0 != op1 && GET_CODE (op0) != MEM)
15571 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15575 if (GET_CODE (op0) == MEM)
15576 op1 = force_reg (Pmode, op1);
15580 if (GET_CODE (temp) != REG)
15581 temp = gen_reg_rtx (Pmode);
15582 temp = legitimize_pic_address (op1, temp);
15587 /* dynamic-no-pic */
15593 op1 = force_reg (mode, op1);
15594 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15596 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15597 op1 = legitimize_pic_address (op1, reg);
15600 if (GET_MODE (op1) != mode)
15601 op1 = convert_to_mode (mode, op1, 1);
15608 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15609 || !push_operand (op0, mode))
15611 op1 = force_reg (mode, op1);
15613 if (push_operand (op0, mode)
15614 && ! general_no_elim_operand (op1, mode))
15615 op1 = copy_to_mode_reg (mode, op1);
15617 /* Force large constants in 64bit compilation into register
15618 to get them CSEed. */
15619 if (can_create_pseudo_p ()
15620 && (mode == DImode) && TARGET_64BIT
15621 && immediate_operand (op1, mode)
15622 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15623 && !register_operand (op0, mode)
15625 op1 = copy_to_mode_reg (mode, op1);
15627 if (can_create_pseudo_p ()
15628 && FLOAT_MODE_P (mode)
15629 && GET_CODE (op1) == CONST_DOUBLE)
15631 /* If we are loading a floating point constant to a register,
15632 force the value to memory now, since we'll get better code
15633 out the back end. */
15635 op1 = validize_mem (force_const_mem (mode, op1));
15636 if (!register_operand (op0, mode))
15638 rtx temp = gen_reg_rtx (mode);
15639 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15640 emit_move_insn (op0, temp);
15646 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15650 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15652 rtx op0 = operands[0], op1 = operands[1];
15653 unsigned int align = GET_MODE_ALIGNMENT (mode);
15655 /* Force constants other than zero into memory. We do not know how
15656 the instructions used to build constants modify the upper 64 bits
15657 of the register, once we have that information we may be able
15658 to handle some of them more efficiently. */
15659 if (can_create_pseudo_p ()
15660 && register_operand (op0, mode)
15661 && (CONSTANT_P (op1)
15662 || (GET_CODE (op1) == SUBREG
15663 && CONSTANT_P (SUBREG_REG (op1))))
15664 && !standard_sse_constant_p (op1))
15665 op1 = validize_mem (force_const_mem (mode, op1));
15667 /* We need to check memory alignment for SSE mode since attribute
15668 can make operands unaligned. */
15669 if (can_create_pseudo_p ()
15670 && SSE_REG_MODE_P (mode)
15671 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15672 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15676 /* ix86_expand_vector_move_misalign() does not like constants ... */
15677 if (CONSTANT_P (op1)
15678 || (GET_CODE (op1) == SUBREG
15679 && CONSTANT_P (SUBREG_REG (op1))))
15680 op1 = validize_mem (force_const_mem (mode, op1));
15682 /* ... nor both arguments in memory. */
15683 if (!register_operand (op0, mode)
15684 && !register_operand (op1, mode))
15685 op1 = force_reg (mode, op1);
15687 tmp[0] = op0; tmp[1] = op1;
15688 ix86_expand_vector_move_misalign (mode, tmp);
15692 /* Make operand1 a register if it isn't already. */
15693 if (can_create_pseudo_p ()
15694 && !register_operand (op0, mode)
15695 && !register_operand (op1, mode))
15697 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15701 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15704 /* Split 32-byte AVX unaligned load and store if needed. */
15707 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15710 rtx (*extract) (rtx, rtx, rtx);
15711 rtx (*load_unaligned) (rtx, rtx);
15712 rtx (*store_unaligned) (rtx, rtx);
15713 enum machine_mode mode;
15715 switch (GET_MODE (op0))
15718 gcc_unreachable ();
15720 extract = gen_avx_vextractf128v32qi;
15721 load_unaligned = gen_avx_loaddqu256;
15722 store_unaligned = gen_avx_storedqu256;
15726 extract = gen_avx_vextractf128v8sf;
15727 load_unaligned = gen_avx_loadups256;
15728 store_unaligned = gen_avx_storeups256;
15732 extract = gen_avx_vextractf128v4df;
15733 load_unaligned = gen_avx_loadupd256;
15734 store_unaligned = gen_avx_storeupd256;
15741 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15743 rtx r = gen_reg_rtx (mode);
15744 m = adjust_address (op1, mode, 0);
15745 emit_move_insn (r, m);
15746 m = adjust_address (op1, mode, 16);
15747 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15748 emit_move_insn (op0, r);
15751 emit_insn (load_unaligned (op0, op1));
15753 else if (MEM_P (op0))
15755 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15757 m = adjust_address (op0, mode, 0);
15758 emit_insn (extract (m, op1, const0_rtx));
15759 m = adjust_address (op0, mode, 16);
15760 emit_insn (extract (m, op1, const1_rtx));
15763 emit_insn (store_unaligned (op0, op1));
15766 gcc_unreachable ();
15769 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15770 straight to ix86_expand_vector_move. */
15771 /* Code generation for scalar reg-reg moves of single and double precision data:
15772 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15776 if (x86_sse_partial_reg_dependency == true)
15781 Code generation for scalar loads of double precision data:
15782 if (x86_sse_split_regs == true)
15783 movlpd mem, reg (gas syntax)
15787 Code generation for unaligned packed loads of single precision data
15788 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15789 if (x86_sse_unaligned_move_optimal)
15792 if (x86_sse_partial_reg_dependency == true)
15804 Code generation for unaligned packed loads of double precision data
15805 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15806 if (x86_sse_unaligned_move_optimal)
15809 if (x86_sse_split_regs == true)
15822 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15825 rtx (*move_unaligned) (rtx, rtx);
15832 switch (GET_MODE_CLASS (mode))
15834 case MODE_VECTOR_INT:
15836 switch (GET_MODE_SIZE (mode))
15839 /* If we're optimizing for size, movups is the smallest. */
15840 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15843 move_unaligned = gen_sse_loadups;
15844 else if (MEM_P (op0))
15845 move_unaligned = gen_sse_storeups;
15847 gcc_unreachable ();
15849 op0 = gen_lowpart (V4SFmode, op0);
15850 op1 = gen_lowpart (V4SFmode, op1);
15851 emit_insn (move_unaligned (op0, op1));
15855 move_unaligned = gen_sse2_loaddqu;
15856 else if (MEM_P (op0))
15857 move_unaligned = gen_sse2_storedqu;
15859 gcc_unreachable ();
15861 op0 = gen_lowpart (V16QImode, op0);
15862 op1 = gen_lowpart (V16QImode, op1);
15863 emit_insn (move_unaligned (op0, op1));
15866 op0 = gen_lowpart (V32QImode, op0);
15867 op1 = gen_lowpart (V32QImode, op1);
15868 ix86_avx256_split_vector_move_misalign (op0, op1);
15871 gcc_unreachable ();
15874 case MODE_VECTOR_FLOAT:
15875 op0 = gen_lowpart (mode, op0);
15876 op1 = gen_lowpart (mode, op1);
15882 move_unaligned = gen_sse_loadups;
15883 else if (MEM_P (op0))
15884 move_unaligned = gen_sse_storeups;
15886 gcc_unreachable ();
15888 emit_insn (move_unaligned (op0, op1));
15891 ix86_avx256_split_vector_move_misalign (op0, op1);
15894 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15897 move_unaligned = gen_sse_loadups;
15898 else if (MEM_P (op0))
15899 move_unaligned = gen_sse_storeups;
15901 gcc_unreachable ();
15903 op0 = gen_lowpart (V4SFmode, op0);
15904 op1 = gen_lowpart (V4SFmode, op1);
15905 emit_insn (move_unaligned (op0, op1));
15909 move_unaligned = gen_sse2_loadupd;
15910 else if (MEM_P (op0))
15911 move_unaligned = gen_sse2_storeupd;
15913 gcc_unreachable ();
15915 emit_insn (move_unaligned (op0, op1));
15918 ix86_avx256_split_vector_move_misalign (op0, op1);
15921 gcc_unreachable ();
15926 gcc_unreachable ();
15934 /* If we're optimizing for size, movups is the smallest. */
15935 if (optimize_insn_for_size_p ()
15936 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15938 op0 = gen_lowpart (V4SFmode, op0);
15939 op1 = gen_lowpart (V4SFmode, op1);
15940 emit_insn (gen_sse_loadups (op0, op1));
15944 /* ??? If we have typed data, then it would appear that using
15945 movdqu is the only way to get unaligned data loaded with
15947 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15949 op0 = gen_lowpart (V16QImode, op0);
15950 op1 = gen_lowpart (V16QImode, op1);
15951 emit_insn (gen_sse2_loaddqu (op0, op1));
15955 if (TARGET_SSE2 && mode == V2DFmode)
15959 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15961 op0 = gen_lowpart (V2DFmode, op0);
15962 op1 = gen_lowpart (V2DFmode, op1);
15963 emit_insn (gen_sse2_loadupd (op0, op1));
15967 /* When SSE registers are split into halves, we can avoid
15968 writing to the top half twice. */
15969 if (TARGET_SSE_SPLIT_REGS)
15971 emit_clobber (op0);
15976 /* ??? Not sure about the best option for the Intel chips.
15977 The following would seem to satisfy; the register is
15978 entirely cleared, breaking the dependency chain. We
15979 then store to the upper half, with a dependency depth
15980 of one. A rumor has it that Intel recommends two movsd
15981 followed by an unpacklpd, but this is unconfirmed. And
15982 given that the dependency depth of the unpacklpd would
15983 still be one, I'm not sure why this would be better. */
15984 zero = CONST0_RTX (V2DFmode);
15987 m = adjust_address (op1, DFmode, 0);
15988 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15989 m = adjust_address (op1, DFmode, 8);
15990 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15994 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15996 op0 = gen_lowpart (V4SFmode, op0);
15997 op1 = gen_lowpart (V4SFmode, op1);
15998 emit_insn (gen_sse_loadups (op0, op1));
16002 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
16003 emit_move_insn (op0, CONST0_RTX (mode));
16005 emit_clobber (op0);
16007 if (mode != V4SFmode)
16008 op0 = gen_lowpart (V4SFmode, op0);
16009 m = adjust_address (op1, V2SFmode, 0);
16010 emit_insn (gen_sse_loadlps (op0, op0, m));
16011 m = adjust_address (op1, V2SFmode, 8);
16012 emit_insn (gen_sse_loadhps (op0, op0, m));
16015 else if (MEM_P (op0))
16017 /* If we're optimizing for size, movups is the smallest. */
16018 if (optimize_insn_for_size_p ()
16019 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
16021 op0 = gen_lowpart (V4SFmode, op0);
16022 op1 = gen_lowpart (V4SFmode, op1);
16023 emit_insn (gen_sse_storeups (op0, op1));
16027 /* ??? Similar to above, only less clear because of quote
16028 typeless stores unquote. */
16029 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
16030 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16032 op0 = gen_lowpart (V16QImode, op0);
16033 op1 = gen_lowpart (V16QImode, op1);
16034 emit_insn (gen_sse2_storedqu (op0, op1));
16038 if (TARGET_SSE2 && mode == V2DFmode)
16040 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
16042 op0 = gen_lowpart (V2DFmode, op0);
16043 op1 = gen_lowpart (V2DFmode, op1);
16044 emit_insn (gen_sse2_storeupd (op0, op1));
16048 m = adjust_address (op0, DFmode, 0);
16049 emit_insn (gen_sse2_storelpd (m, op1));
16050 m = adjust_address (op0, DFmode, 8);
16051 emit_insn (gen_sse2_storehpd (m, op1));
16056 if (mode != V4SFmode)
16057 op1 = gen_lowpart (V4SFmode, op1);
16059 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
16061 op0 = gen_lowpart (V4SFmode, op0);
16062 emit_insn (gen_sse_storeups (op0, op1));
16066 m = adjust_address (op0, V2SFmode, 0);
16067 emit_insn (gen_sse_storelps (m, op1));
16068 m = adjust_address (op0, V2SFmode, 8);
16069 emit_insn (gen_sse_storehps (m, op1));
16074 gcc_unreachable ();
16077 /* Expand a push in MODE. This is some mode for which we do not support
16078 proper push instructions, at least from the registers that we expect
16079 the value to live in. */
16082 ix86_expand_push (enum machine_mode mode, rtx x)
16086 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16087 GEN_INT (-GET_MODE_SIZE (mode)),
16088 stack_pointer_rtx, 1, OPTAB_DIRECT);
16089 if (tmp != stack_pointer_rtx)
16090 emit_move_insn (stack_pointer_rtx, tmp);
16092 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16094 /* When we push an operand onto stack, it has to be aligned at least
16095 at the function argument boundary. However since we don't have
16096 the argument type, we can't determine the actual argument
16098 emit_move_insn (tmp, x);
16101 /* Helper function of ix86_fixup_binary_operands to canonicalize
16102 operand order. Returns true if the operands should be swapped. */
16105 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16108 rtx dst = operands[0];
16109 rtx src1 = operands[1];
16110 rtx src2 = operands[2];
16112 /* If the operation is not commutative, we can't do anything. */
16113 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16116 /* Highest priority is that src1 should match dst. */
16117 if (rtx_equal_p (dst, src1))
16119 if (rtx_equal_p (dst, src2))
16122 /* Next highest priority is that immediate constants come second. */
16123 if (immediate_operand (src2, mode))
16125 if (immediate_operand (src1, mode))
16128 /* Lowest priority is that memory references should come second. */
16138 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16139 destination to use for the operation. If different from the true
16140 destination in operands[0], a copy operation will be required. */
16143 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16146 rtx dst = operands[0];
16147 rtx src1 = operands[1];
16148 rtx src2 = operands[2];
16150 /* Canonicalize operand order. */
16151 if (ix86_swap_binary_operands_p (code, mode, operands))
16155 /* It is invalid to swap operands of different modes. */
16156 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16163 /* Both source operands cannot be in memory. */
16164 if (MEM_P (src1) && MEM_P (src2))
16166 /* Optimization: Only read from memory once. */
16167 if (rtx_equal_p (src1, src2))
16169 src2 = force_reg (mode, src2);
16173 src2 = force_reg (mode, src2);
16176 /* If the destination is memory, and we do not have matching source
16177 operands, do things in registers. */
16178 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16179 dst = gen_reg_rtx (mode);
16181 /* Source 1 cannot be a constant. */
16182 if (CONSTANT_P (src1))
16183 src1 = force_reg (mode, src1);
16185 /* Source 1 cannot be a non-matching memory. */
16186 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16187 src1 = force_reg (mode, src1);
16189 /* Improve address combine. */
16191 && GET_MODE_CLASS (mode) == MODE_INT
16193 src2 = force_reg (mode, src2);
16195 operands[1] = src1;
16196 operands[2] = src2;
16200 /* Similarly, but assume that the destination has already been
16201 set up properly. */
16204 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16205 enum machine_mode mode, rtx operands[])
16207 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16208 gcc_assert (dst == operands[0]);
16211 /* Attempt to expand a binary operator. Make the expansion closer to the
16212 actual machine, then just general_operand, which will allow 3 separate
16213 memory references (one output, two input) in a single insn. */
16216 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16219 rtx src1, src2, dst, op, clob;
16221 dst = ix86_fixup_binary_operands (code, mode, operands);
16222 src1 = operands[1];
16223 src2 = operands[2];
16225 /* Emit the instruction. */
16227 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16228 if (reload_in_progress)
16230 /* Reload doesn't know about the flags register, and doesn't know that
16231 it doesn't want to clobber it. We can only do this with PLUS. */
16232 gcc_assert (code == PLUS);
16235 else if (reload_completed
16237 && !rtx_equal_p (dst, src1))
16239 /* This is going to be an LEA; avoid splitting it later. */
16244 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16245 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16248 /* Fix up the destination if needed. */
16249 if (dst != operands[0])
16250 emit_move_insn (operands[0], dst);
16253 /* Return TRUE or FALSE depending on whether the binary operator meets the
16254 appropriate constraints. */
16257 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16260 rtx dst = operands[0];
16261 rtx src1 = operands[1];
16262 rtx src2 = operands[2];
16264 /* Both source operands cannot be in memory. */
16265 if (MEM_P (src1) && MEM_P (src2))
16268 /* Canonicalize operand order for commutative operators. */
16269 if (ix86_swap_binary_operands_p (code, mode, operands))
16276 /* If the destination is memory, we must have a matching source operand. */
16277 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16280 /* Source 1 cannot be a constant. */
16281 if (CONSTANT_P (src1))
16284 /* Source 1 cannot be a non-matching memory. */
16285 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16286 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16287 return (code == AND
16290 || (TARGET_64BIT && mode == DImode))
16291 && satisfies_constraint_L (src2));
16296 /* Attempt to expand a unary operator. Make the expansion closer to the
16297 actual machine, then just general_operand, which will allow 2 separate
16298 memory references (one output, one input) in a single insn. */
16301 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16304 int matching_memory;
16305 rtx src, dst, op, clob;
16310 /* If the destination is memory, and we do not have matching source
16311 operands, do things in registers. */
16312 matching_memory = 0;
16315 if (rtx_equal_p (dst, src))
16316 matching_memory = 1;
16318 dst = gen_reg_rtx (mode);
16321 /* When source operand is memory, destination must match. */
16322 if (MEM_P (src) && !matching_memory)
16323 src = force_reg (mode, src);
16325 /* Emit the instruction. */
16327 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16328 if (reload_in_progress || code == NOT)
16330 /* Reload doesn't know about the flags register, and doesn't know that
16331 it doesn't want to clobber it. */
16332 gcc_assert (code == NOT);
16337 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16338 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16341 /* Fix up the destination if needed. */
16342 if (dst != operands[0])
16343 emit_move_insn (operands[0], dst);
16346 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16347 divisor are within the range [0-255]. */
16350 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16353 rtx end_label, qimode_label;
16354 rtx insn, div, mod;
16355 rtx scratch, tmp0, tmp1, tmp2;
16356 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16357 rtx (*gen_zero_extend) (rtx, rtx);
16358 rtx (*gen_test_ccno_1) (rtx, rtx);
16363 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16364 gen_test_ccno_1 = gen_testsi_ccno_1;
16365 gen_zero_extend = gen_zero_extendqisi2;
16368 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16369 gen_test_ccno_1 = gen_testdi_ccno_1;
16370 gen_zero_extend = gen_zero_extendqidi2;
16373 gcc_unreachable ();
16376 end_label = gen_label_rtx ();
16377 qimode_label = gen_label_rtx ();
16379 scratch = gen_reg_rtx (mode);
16381 /* Use 8bit unsigned divimod if dividend and divisor are within
16382 the range [0-255]. */
16383 emit_move_insn (scratch, operands[2]);
16384 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16385 scratch, 1, OPTAB_DIRECT);
16386 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16387 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16388 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16389 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16390 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16392 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16393 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16394 JUMP_LABEL (insn) = qimode_label;
16396 /* Generate original signed/unsigned divimod. */
16397 div = gen_divmod4_1 (operands[0], operands[1],
16398 operands[2], operands[3]);
16401 /* Branch to the end. */
16402 emit_jump_insn (gen_jump (end_label));
16405 /* Generate 8bit unsigned divide. */
16406 emit_label (qimode_label);
16407 /* Don't use operands[0] for result of 8bit divide since not all
16408 registers support QImode ZERO_EXTRACT. */
16409 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16410 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16411 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16412 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16416 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16417 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16421 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16422 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16425 /* Extract remainder from AH. */
16426 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16427 if (REG_P (operands[1]))
16428 insn = emit_move_insn (operands[1], tmp1);
16431 /* Need a new scratch register since the old one has result
16433 scratch = gen_reg_rtx (mode);
16434 emit_move_insn (scratch, tmp1);
16435 insn = emit_move_insn (operands[1], scratch);
16437 set_unique_reg_note (insn, REG_EQUAL, mod);
16439 /* Zero extend quotient from AL. */
16440 tmp1 = gen_lowpart (QImode, tmp0);
16441 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16442 set_unique_reg_note (insn, REG_EQUAL, div);
16444 emit_label (end_label);
16447 #define LEA_MAX_STALL (3)
16448 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16450 /* Increase given DISTANCE in half-cycles according to
16451 dependencies between PREV and NEXT instructions.
16452 Add 1 half-cycle if there is no dependency and
16453 go to next cycle if there is some dependecy. */
16455 static unsigned int
16456 increase_distance (rtx prev, rtx next, unsigned int distance)
16461 if (!prev || !next)
16462 return distance + (distance & 1) + 2;
16464 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16465 return distance + 1;
16467 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16468 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16469 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16470 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16471 return distance + (distance & 1) + 2;
16473 return distance + 1;
16476 /* Function checks if instruction INSN defines register number
16477 REGNO1 or REGNO2. */
16480 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16485 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16486 if (DF_REF_REG_DEF_P (*def_rec)
16487 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16488 && (regno1 == DF_REF_REGNO (*def_rec)
16489 || regno2 == DF_REF_REGNO (*def_rec)))
16497 /* Function checks if instruction INSN uses register number
16498 REGNO as a part of address expression. */
16501 insn_uses_reg_mem (unsigned int regno, rtx insn)
16505 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16506 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16512 /* Search backward for non-agu definition of register number REGNO1
16513 or register number REGNO2 in basic block starting from instruction
16514 START up to head of basic block or instruction INSN.
16516 Function puts true value into *FOUND var if definition was found
16517 and false otherwise.
16519 Distance in half-cycles between START and found instruction or head
16520 of BB is added to DISTANCE and returned. */
16523 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16524 rtx insn, int distance,
16525 rtx start, bool *found)
16527 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16535 && distance < LEA_SEARCH_THRESHOLD)
16537 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16539 distance = increase_distance (prev, next, distance);
16540 if (insn_defines_reg (regno1, regno2, prev))
16542 if (recog_memoized (prev) < 0
16543 || get_attr_type (prev) != TYPE_LEA)
16552 if (prev == BB_HEAD (bb))
16555 prev = PREV_INSN (prev);
16561 /* Search backward for non-agu definition of register number REGNO1
16562 or register number REGNO2 in INSN's basic block until
16563 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16564 2. Reach neighbour BBs boundary, or
16565 3. Reach agu definition.
16566 Returns the distance between the non-agu definition point and INSN.
16567 If no definition point, returns -1. */
16570 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16573 basic_block bb = BLOCK_FOR_INSN (insn);
16575 bool found = false;
16577 if (insn != BB_HEAD (bb))
16578 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16579 distance, PREV_INSN (insn),
16582 if (!found && distance < LEA_SEARCH_THRESHOLD)
16586 bool simple_loop = false;
16588 FOR_EACH_EDGE (e, ei, bb->preds)
16591 simple_loop = true;
16596 distance = distance_non_agu_define_in_bb (regno1, regno2,
16598 BB_END (bb), &found);
16601 int shortest_dist = -1;
16602 bool found_in_bb = false;
16604 FOR_EACH_EDGE (e, ei, bb->preds)
16607 = distance_non_agu_define_in_bb (regno1, regno2,
16613 if (shortest_dist < 0)
16614 shortest_dist = bb_dist;
16615 else if (bb_dist > 0)
16616 shortest_dist = MIN (bb_dist, shortest_dist);
16622 distance = shortest_dist;
16626 /* get_attr_type may modify recog data. We want to make sure
16627 that recog data is valid for instruction INSN, on which
16628 distance_non_agu_define is called. INSN is unchanged here. */
16629 extract_insn_cached (insn);
16634 return distance >> 1;
16637 /* Return the distance in half-cycles between INSN and the next
16638 insn that uses register number REGNO in memory address added
16639 to DISTANCE. Return -1 if REGNO0 is set.
16641 Put true value into *FOUND if register usage was found and
16643 Put true value into *REDEFINED if register redefinition was
16644 found and false otherwise. */
16647 distance_agu_use_in_bb (unsigned int regno,
16648 rtx insn, int distance, rtx start,
16649 bool *found, bool *redefined)
16651 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16656 *redefined = false;
16660 && distance < LEA_SEARCH_THRESHOLD)
16662 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16664 distance = increase_distance(prev, next, distance);
16665 if (insn_uses_reg_mem (regno, next))
16667 /* Return DISTANCE if OP0 is used in memory
16668 address in NEXT. */
16673 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16675 /* Return -1 if OP0 is set in NEXT. */
16683 if (next == BB_END (bb))
16686 next = NEXT_INSN (next);
16692 /* Return the distance between INSN and the next insn that uses
16693 register number REGNO0 in memory address. Return -1 if no such
16694 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16697 distance_agu_use (unsigned int regno0, rtx insn)
16699 basic_block bb = BLOCK_FOR_INSN (insn);
16701 bool found = false;
16702 bool redefined = false;
16704 if (insn != BB_END (bb))
16705 distance = distance_agu_use_in_bb (regno0, insn, distance,
16707 &found, &redefined);
16709 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16713 bool simple_loop = false;
16715 FOR_EACH_EDGE (e, ei, bb->succs)
16718 simple_loop = true;
16723 distance = distance_agu_use_in_bb (regno0, insn,
16724 distance, BB_HEAD (bb),
16725 &found, &redefined);
16728 int shortest_dist = -1;
16729 bool found_in_bb = false;
16730 bool redefined_in_bb = false;
16732 FOR_EACH_EDGE (e, ei, bb->succs)
16735 = distance_agu_use_in_bb (regno0, insn,
16736 distance, BB_HEAD (e->dest),
16737 &found_in_bb, &redefined_in_bb);
16740 if (shortest_dist < 0)
16741 shortest_dist = bb_dist;
16742 else if (bb_dist > 0)
16743 shortest_dist = MIN (bb_dist, shortest_dist);
16749 distance = shortest_dist;
16753 if (!found || redefined)
16756 return distance >> 1;
16759 /* Define this macro to tune LEA priority vs ADD, it take effect when
16760 there is a dilemma of choicing LEA or ADD
16761 Negative value: ADD is more preferred than LEA
16763 Positive value: LEA is more preferred than ADD*/
16764 #define IX86_LEA_PRIORITY 0
16766 /* Return true if usage of lea INSN has performance advantage
16767 over a sequence of instructions. Instructions sequence has
16768 SPLIT_COST cycles higher latency than lea latency. */
16771 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16772 unsigned int regno2, int split_cost)
16774 int dist_define, dist_use;
16776 dist_define = distance_non_agu_define (regno1, regno2, insn);
16777 dist_use = distance_agu_use (regno0, insn);
16779 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16781 /* If there is no non AGU operand definition, no AGU
16782 operand usage and split cost is 0 then both lea
16783 and non lea variants have same priority. Currently
16784 we prefer lea for 64 bit code and non lea on 32 bit
16786 if (dist_use < 0 && split_cost == 0)
16787 return TARGET_64BIT || IX86_LEA_PRIORITY;
16792 /* With longer definitions distance lea is more preferable.
16793 Here we change it to take into account splitting cost and
16795 dist_define += split_cost + IX86_LEA_PRIORITY;
16797 /* If there is no use in memory addess then we just check
16798 that split cost does not exceed AGU stall. */
16800 return dist_define >= LEA_MAX_STALL;
16802 /* If this insn has both backward non-agu dependence and forward
16803 agu dependence, the one with short distance takes effect. */
16804 return dist_define >= dist_use;
16807 /* Return true if it is legal to clobber flags by INSN and
16808 false otherwise. */
16811 ix86_ok_to_clobber_flags (rtx insn)
16813 basic_block bb = BLOCK_FOR_INSN (insn);
16819 if (NONDEBUG_INSN_P (insn))
16821 for (use = DF_INSN_USES (insn); *use; use++)
16822 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16825 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16829 if (insn == BB_END (bb))
16832 insn = NEXT_INSN (insn);
16835 live = df_get_live_out(bb);
16836 return !REGNO_REG_SET_P (live, FLAGS_REG);
16839 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16840 move and add to avoid AGU stalls. */
16843 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16845 unsigned int regno0 = true_regnum (operands[0]);
16846 unsigned int regno1 = true_regnum (operands[1]);
16847 unsigned int regno2 = true_regnum (operands[2]);
16849 /* Check if we need to optimize. */
16850 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16853 /* Check it is correct to split here. */
16854 if (!ix86_ok_to_clobber_flags(insn))
16857 /* We need to split only adds with non destructive
16858 destination operand. */
16859 if (regno0 == regno1 || regno0 == regno2)
16862 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16865 /* Return true if we should emit lea instruction instead of mov
16869 ix86_use_lea_for_mov (rtx insn, rtx operands[])
16871 unsigned int regno0;
16872 unsigned int regno1;
16874 /* Check if we need to optimize. */
16875 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16878 /* Use lea for reg to reg moves only. */
16879 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16882 regno0 = true_regnum (operands[0]);
16883 regno1 = true_regnum (operands[1]);
16885 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
16888 /* Return true if we need to split lea into a sequence of
16889 instructions to avoid AGU stalls. */
16892 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16894 unsigned int regno0 = true_regnum (operands[0]) ;
16895 unsigned int regno1 = INVALID_REGNUM;
16896 unsigned int regno2 = INVALID_REGNUM;
16897 int split_cost = 0;
16898 struct ix86_address parts;
16901 /* FIXME: Handle zero-extended addresses. */
16902 if (GET_CODE (operands[1]) == ZERO_EXTEND
16903 || GET_CODE (operands[1]) == AND)
16906 /* Check we need to optimize. */
16907 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16910 /* Check it is correct to split here. */
16911 if (!ix86_ok_to_clobber_flags(insn))
16914 ok = ix86_decompose_address (operands[1], &parts);
16917 /* There should be at least two components in the address. */
16918 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
16919 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
16922 /* We should not split into add if non legitimate pic
16923 operand is used as displacement. */
16924 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16928 regno1 = true_regnum (parts.base);
16930 regno2 = true_regnum (parts.index);
16932 /* Compute how many cycles we will add to execution time
16933 if split lea into a sequence of instructions. */
16934 if (parts.base || parts.index)
16936 /* Have to use mov instruction if non desctructive
16937 destination form is used. */
16938 if (regno1 != regno0 && regno2 != regno0)
16941 /* Have to add index to base if both exist. */
16942 if (parts.base && parts.index)
16945 /* Have to use shift and adds if scale is 2 or greater. */
16946 if (parts.scale > 1)
16948 if (regno0 != regno1)
16950 else if (regno2 == regno0)
16953 split_cost += parts.scale;
16956 /* Have to use add instruction with immediate if
16957 disp is non zero. */
16958 if (parts.disp && parts.disp != const0_rtx)
16961 /* Subtract the price of lea. */
16965 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16968 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16969 matches destination. RTX includes clobber of FLAGS_REG. */
16972 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
16977 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
16978 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16980 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16983 /* Split lea instructions into a sequence of instructions
16984 which are executed on ALU to avoid AGU stalls.
16985 It is assumed that it is allowed to clobber flags register
16986 at lea position. */
16989 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16991 unsigned int regno0 = true_regnum (operands[0]) ;
16992 unsigned int regno1 = INVALID_REGNUM;
16993 unsigned int regno2 = INVALID_REGNUM;
16994 struct ix86_address parts;
16998 ok = ix86_decompose_address (operands[1], &parts);
17003 if (GET_MODE (parts.base) != mode)
17004 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
17005 regno1 = true_regnum (parts.base);
17010 if (GET_MODE (parts.index) != mode)
17011 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
17012 regno2 = true_regnum (parts.index);
17015 if (parts.scale > 1)
17017 /* Case r1 = r1 + ... */
17018 if (regno1 == regno0)
17020 /* If we have a case r1 = r1 + C * r1 then we
17021 should use multiplication which is very
17022 expensive. Assume cost model is wrong if we
17023 have such case here. */
17024 gcc_assert (regno2 != regno0);
17026 for (adds = parts.scale; adds > 0; adds--)
17027 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
17031 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17032 if (regno0 != regno2)
17033 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
17035 /* Use shift for scaling. */
17036 ix86_emit_binop (ASHIFT, mode, operands[0],
17037 GEN_INT (exact_log2 (parts.scale)));
17040 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
17042 if (parts.disp && parts.disp != const0_rtx)
17043 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
17046 else if (!parts.base && !parts.index)
17048 gcc_assert(parts.disp);
17049 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
17055 if (regno0 != regno2)
17056 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
17058 else if (!parts.index)
17060 if (regno0 != regno1)
17061 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17065 if (regno0 == regno1)
17067 else if (regno0 == regno2)
17071 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
17075 ix86_emit_binop (PLUS, mode, operands[0], tmp);
17078 if (parts.disp && parts.disp != const0_rtx)
17079 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
17083 /* Return true if it is ok to optimize an ADD operation to LEA
17084 operation to avoid flag register consumation. For most processors,
17085 ADD is faster than LEA. For the processors like ATOM, if the
17086 destination register of LEA holds an actual address which will be
17087 used soon, LEA is better and otherwise ADD is better. */
17090 ix86_lea_for_add_ok (rtx insn, rtx operands[])
17092 unsigned int regno0 = true_regnum (operands[0]);
17093 unsigned int regno1 = true_regnum (operands[1]);
17094 unsigned int regno2 = true_regnum (operands[2]);
17096 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17097 if (regno0 != regno1 && regno0 != regno2)
17100 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17103 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
17106 /* Return true if destination reg of SET_BODY is shift count of
17110 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
17116 /* Retrieve destination of SET_BODY. */
17117 switch (GET_CODE (set_body))
17120 set_dest = SET_DEST (set_body);
17121 if (!set_dest || !REG_P (set_dest))
17125 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
17126 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
17134 /* Retrieve shift count of USE_BODY. */
17135 switch (GET_CODE (use_body))
17138 shift_rtx = XEXP (use_body, 1);
17141 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
17142 if (ix86_dep_by_shift_count_body (set_body,
17143 XVECEXP (use_body, 0, i)))
17151 && (GET_CODE (shift_rtx) == ASHIFT
17152 || GET_CODE (shift_rtx) == LSHIFTRT
17153 || GET_CODE (shift_rtx) == ASHIFTRT
17154 || GET_CODE (shift_rtx) == ROTATE
17155 || GET_CODE (shift_rtx) == ROTATERT))
17157 rtx shift_count = XEXP (shift_rtx, 1);
17159 /* Return true if shift count is dest of SET_BODY. */
17160 if (REG_P (shift_count)
17161 && true_regnum (set_dest) == true_regnum (shift_count))
17168 /* Return true if destination reg of SET_INSN is shift count of
17172 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
17174 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
17175 PATTERN (use_insn));
17178 /* Return TRUE or FALSE depending on whether the unary operator meets the
17179 appropriate constraints. */
17182 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
17183 enum machine_mode mode ATTRIBUTE_UNUSED,
17184 rtx operands[2] ATTRIBUTE_UNUSED)
17186 /* If one of operands is memory, source and destination must match. */
17187 if ((MEM_P (operands[0])
17188 || MEM_P (operands[1]))
17189 && ! rtx_equal_p (operands[0], operands[1]))
17194 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17195 are ok, keeping in mind the possible movddup alternative. */
17198 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
17200 if (MEM_P (operands[0]))
17201 return rtx_equal_p (operands[0], operands[1 + high]);
17202 if (MEM_P (operands[1]) && MEM_P (operands[2]))
17203 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
17207 /* Post-reload splitter for converting an SF or DFmode value in an
17208 SSE register into an unsigned SImode. */
17211 ix86_split_convert_uns_si_sse (rtx operands[])
17213 enum machine_mode vecmode;
17214 rtx value, large, zero_or_two31, input, two31, x;
17216 large = operands[1];
17217 zero_or_two31 = operands[2];
17218 input = operands[3];
17219 two31 = operands[4];
17220 vecmode = GET_MODE (large);
17221 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
17223 /* Load up the value into the low element. We must ensure that the other
17224 elements are valid floats -- zero is the easiest such value. */
17227 if (vecmode == V4SFmode)
17228 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
17230 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
17234 input = gen_rtx_REG (vecmode, REGNO (input));
17235 emit_move_insn (value, CONST0_RTX (vecmode));
17236 if (vecmode == V4SFmode)
17237 emit_insn (gen_sse_movss (value, value, input));
17239 emit_insn (gen_sse2_movsd (value, value, input));
17242 emit_move_insn (large, two31);
17243 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
17245 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
17246 emit_insn (gen_rtx_SET (VOIDmode, large, x));
17248 x = gen_rtx_AND (vecmode, zero_or_two31, large);
17249 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
17251 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17252 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17254 large = gen_rtx_REG (V4SImode, REGNO (large));
17255 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17257 x = gen_rtx_REG (V4SImode, REGNO (value));
17258 if (vecmode == V4SFmode)
17259 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17261 emit_insn (gen_sse2_cvttpd2dq (x, value));
17264 emit_insn (gen_xorv4si3 (value, value, large));
17267 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17268 Expects the 64-bit DImode to be supplied in a pair of integral
17269 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17270 -mfpmath=sse, !optimize_size only. */
17273 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17275 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17276 rtx int_xmm, fp_xmm;
17277 rtx biases, exponents;
17280 int_xmm = gen_reg_rtx (V4SImode);
17281 if (TARGET_INTER_UNIT_MOVES)
17282 emit_insn (gen_movdi_to_sse (int_xmm, input));
17283 else if (TARGET_SSE_SPLIT_REGS)
17285 emit_clobber (int_xmm);
17286 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17290 x = gen_reg_rtx (V2DImode);
17291 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17292 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17295 x = gen_rtx_CONST_VECTOR (V4SImode,
17296 gen_rtvec (4, GEN_INT (0x43300000UL),
17297 GEN_INT (0x45300000UL),
17298 const0_rtx, const0_rtx));
17299 exponents = validize_mem (force_const_mem (V4SImode, x));
17301 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17302 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17304 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17305 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17306 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17307 (0x1.0p84 + double(fp_value_hi_xmm)).
17308 Note these exponents differ by 32. */
17310 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17312 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17313 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17314 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17315 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17316 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17317 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17318 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17319 biases = validize_mem (force_const_mem (V2DFmode, biases));
17320 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17322 /* Add the upper and lower DFmode values together. */
17324 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
17327 x = copy_to_mode_reg (V2DFmode, fp_xmm);
17328 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
17329 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
17332 ix86_expand_vector_extract (false, target, fp_xmm, 0);
17335 /* Not used, but eases macroization of patterns. */
17337 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
17338 rtx input ATTRIBUTE_UNUSED)
17340 gcc_unreachable ();
17343 /* Convert an unsigned SImode value into a DFmode. Only currently used
17344 for SSE, but applicable anywhere. */
17347 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
17349 REAL_VALUE_TYPE TWO31r;
17352 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
17353 NULL, 1, OPTAB_DIRECT);
17355 fp = gen_reg_rtx (DFmode);
17356 emit_insn (gen_floatsidf2 (fp, x));
17358 real_ldexp (&TWO31r, &dconst1, 31);
17359 x = const_double_from_real_value (TWO31r, DFmode);
17361 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
17363 emit_move_insn (target, x);
17366 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17367 32-bit mode; otherwise we have a direct convert instruction. */
17370 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
17372 REAL_VALUE_TYPE TWO32r;
17373 rtx fp_lo, fp_hi, x;
17375 fp_lo = gen_reg_rtx (DFmode);
17376 fp_hi = gen_reg_rtx (DFmode);
17378 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
17380 real_ldexp (&TWO32r, &dconst1, 32);
17381 x = const_double_from_real_value (TWO32r, DFmode);
17382 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
17384 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
17386 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
17389 emit_move_insn (target, x);
17392 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17393 For x86_32, -mfpmath=sse, !optimize_size only. */
17395 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
17397 REAL_VALUE_TYPE ONE16r;
17398 rtx fp_hi, fp_lo, int_hi, int_lo, x;
17400 real_ldexp (&ONE16r, &dconst1, 16);
17401 x = const_double_from_real_value (ONE16r, SFmode);
17402 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
17403 NULL, 0, OPTAB_DIRECT);
17404 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
17405 NULL, 0, OPTAB_DIRECT);
17406 fp_hi = gen_reg_rtx (SFmode);
17407 fp_lo = gen_reg_rtx (SFmode);
17408 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
17409 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
17410 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
17412 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
17414 if (!rtx_equal_p (target, fp_hi))
17415 emit_move_insn (target, fp_hi);
17418 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17419 a vector of unsigned ints VAL to vector of floats TARGET. */
17422 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
17425 REAL_VALUE_TYPE TWO16r;
17426 enum machine_mode intmode = GET_MODE (val);
17427 enum machine_mode fltmode = GET_MODE (target);
17428 rtx (*cvt) (rtx, rtx);
17430 if (intmode == V4SImode)
17431 cvt = gen_floatv4siv4sf2;
17433 cvt = gen_floatv8siv8sf2;
17434 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
17435 tmp[0] = force_reg (intmode, tmp[0]);
17436 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
17438 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
17439 NULL_RTX, 1, OPTAB_DIRECT);
17440 tmp[3] = gen_reg_rtx (fltmode);
17441 emit_insn (cvt (tmp[3], tmp[1]));
17442 tmp[4] = gen_reg_rtx (fltmode);
17443 emit_insn (cvt (tmp[4], tmp[2]));
17444 real_ldexp (&TWO16r, &dconst1, 16);
17445 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
17446 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
17447 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
17449 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
17451 if (tmp[7] != target)
17452 emit_move_insn (target, tmp[7]);
17455 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17456 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17457 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17458 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17461 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
17463 REAL_VALUE_TYPE TWO31r;
17464 rtx two31r, tmp[4];
17465 enum machine_mode mode = GET_MODE (val);
17466 enum machine_mode scalarmode = GET_MODE_INNER (mode);
17467 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
17468 rtx (*cmp) (rtx, rtx, rtx, rtx);
17471 for (i = 0; i < 3; i++)
17472 tmp[i] = gen_reg_rtx (mode);
17473 real_ldexp (&TWO31r, &dconst1, 31);
17474 two31r = const_double_from_real_value (TWO31r, scalarmode);
17475 two31r = ix86_build_const_vector (mode, 1, two31r);
17476 two31r = force_reg (mode, two31r);
17479 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
17480 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
17481 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
17482 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
17483 default: gcc_unreachable ();
17485 tmp[3] = gen_rtx_LE (mode, two31r, val);
17486 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
17487 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
17489 if (intmode == V4SImode || TARGET_AVX2)
17490 *xorp = expand_simple_binop (intmode, ASHIFT,
17491 gen_lowpart (intmode, tmp[0]),
17492 GEN_INT (31), NULL_RTX, 0,
17496 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
17497 two31 = ix86_build_const_vector (intmode, 1, two31);
17498 *xorp = expand_simple_binop (intmode, AND,
17499 gen_lowpart (intmode, tmp[0]),
17500 two31, NULL_RTX, 0,
17503 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
17507 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17508 then replicate the value for all elements of the vector
17512 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17516 enum machine_mode scalar_mode;
17533 n_elt = GET_MODE_NUNITS (mode);
17534 v = rtvec_alloc (n_elt);
17535 scalar_mode = GET_MODE_INNER (mode);
17537 RTVEC_ELT (v, 0) = value;
17539 for (i = 1; i < n_elt; ++i)
17540 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17542 return gen_rtx_CONST_VECTOR (mode, v);
17545 gcc_unreachable ();
17549 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17550 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17551 for an SSE register. If VECT is true, then replicate the mask for
17552 all elements of the vector register. If INVERT is true, then create
17553 a mask excluding the sign bit. */
17556 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17558 enum machine_mode vec_mode, imode;
17559 HOST_WIDE_INT hi, lo;
17564 /* Find the sign bit, sign extended to 2*HWI. */
17572 mode = GET_MODE_INNER (mode);
17574 lo = 0x80000000, hi = lo < 0;
17582 mode = GET_MODE_INNER (mode);
17584 if (HOST_BITS_PER_WIDE_INT >= 64)
17585 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17587 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17592 vec_mode = VOIDmode;
17593 if (HOST_BITS_PER_WIDE_INT >= 64)
17596 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17603 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17607 lo = ~lo, hi = ~hi;
17613 mask = immed_double_const (lo, hi, imode);
17615 vec = gen_rtvec (2, v, mask);
17616 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17617 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17624 gcc_unreachable ();
17628 lo = ~lo, hi = ~hi;
17630 /* Force this value into the low part of a fp vector constant. */
17631 mask = immed_double_const (lo, hi, imode);
17632 mask = gen_lowpart (mode, mask);
17634 if (vec_mode == VOIDmode)
17635 return force_reg (mode, mask);
17637 v = ix86_build_const_vector (vec_mode, vect, mask);
17638 return force_reg (vec_mode, v);
17641 /* Generate code for floating point ABS or NEG. */
17644 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17647 rtx mask, set, dst, src;
17648 bool use_sse = false;
17649 bool vector_mode = VECTOR_MODE_P (mode);
17650 enum machine_mode vmode = mode;
17654 else if (mode == TFmode)
17656 else if (TARGET_SSE_MATH)
17658 use_sse = SSE_FLOAT_MODE_P (mode);
17659 if (mode == SFmode)
17661 else if (mode == DFmode)
17665 /* NEG and ABS performed with SSE use bitwise mask operations.
17666 Create the appropriate mask now. */
17668 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17675 set = gen_rtx_fmt_e (code, mode, src);
17676 set = gen_rtx_SET (VOIDmode, dst, set);
17683 use = gen_rtx_USE (VOIDmode, mask);
17685 par = gen_rtvec (2, set, use);
17688 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17689 par = gen_rtvec (3, set, use, clob);
17691 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17697 /* Expand a copysign operation. Special case operand 0 being a constant. */
17700 ix86_expand_copysign (rtx operands[])
17702 enum machine_mode mode, vmode;
17703 rtx dest, op0, op1, mask, nmask;
17705 dest = operands[0];
17709 mode = GET_MODE (dest);
17711 if (mode == SFmode)
17713 else if (mode == DFmode)
17718 if (GET_CODE (op0) == CONST_DOUBLE)
17720 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17722 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17723 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17725 if (mode == SFmode || mode == DFmode)
17727 if (op0 == CONST0_RTX (mode))
17728 op0 = CONST0_RTX (vmode);
17731 rtx v = ix86_build_const_vector (vmode, false, op0);
17733 op0 = force_reg (vmode, v);
17736 else if (op0 != CONST0_RTX (mode))
17737 op0 = force_reg (mode, op0);
17739 mask = ix86_build_signbit_mask (vmode, 0, 0);
17741 if (mode == SFmode)
17742 copysign_insn = gen_copysignsf3_const;
17743 else if (mode == DFmode)
17744 copysign_insn = gen_copysigndf3_const;
17746 copysign_insn = gen_copysigntf3_const;
17748 emit_insn (copysign_insn (dest, op0, op1, mask));
17752 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17754 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17755 mask = ix86_build_signbit_mask (vmode, 0, 0);
17757 if (mode == SFmode)
17758 copysign_insn = gen_copysignsf3_var;
17759 else if (mode == DFmode)
17760 copysign_insn = gen_copysigndf3_var;
17762 copysign_insn = gen_copysigntf3_var;
17764 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17768 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17769 be a constant, and so has already been expanded into a vector constant. */
17772 ix86_split_copysign_const (rtx operands[])
17774 enum machine_mode mode, vmode;
17775 rtx dest, op0, mask, x;
17777 dest = operands[0];
17779 mask = operands[3];
17781 mode = GET_MODE (dest);
17782 vmode = GET_MODE (mask);
17784 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17785 x = gen_rtx_AND (vmode, dest, mask);
17786 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17788 if (op0 != CONST0_RTX (vmode))
17790 x = gen_rtx_IOR (vmode, dest, op0);
17791 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17795 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17796 so we have to do two masks. */
17799 ix86_split_copysign_var (rtx operands[])
17801 enum machine_mode mode, vmode;
17802 rtx dest, scratch, op0, op1, mask, nmask, x;
17804 dest = operands[0];
17805 scratch = operands[1];
17808 nmask = operands[4];
17809 mask = operands[5];
17811 mode = GET_MODE (dest);
17812 vmode = GET_MODE (mask);
17814 if (rtx_equal_p (op0, op1))
17816 /* Shouldn't happen often (it's useless, obviously), but when it does
17817 we'd generate incorrect code if we continue below. */
17818 emit_move_insn (dest, op0);
17822 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17824 gcc_assert (REGNO (op1) == REGNO (scratch));
17826 x = gen_rtx_AND (vmode, scratch, mask);
17827 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17830 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17831 x = gen_rtx_NOT (vmode, dest);
17832 x = gen_rtx_AND (vmode, x, op0);
17833 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17837 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17839 x = gen_rtx_AND (vmode, scratch, mask);
17841 else /* alternative 2,4 */
17843 gcc_assert (REGNO (mask) == REGNO (scratch));
17844 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17845 x = gen_rtx_AND (vmode, scratch, op1);
17847 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17849 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17851 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17852 x = gen_rtx_AND (vmode, dest, nmask);
17854 else /* alternative 3,4 */
17856 gcc_assert (REGNO (nmask) == REGNO (dest));
17858 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17859 x = gen_rtx_AND (vmode, dest, op0);
17861 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17864 x = gen_rtx_IOR (vmode, dest, scratch);
17865 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17868 /* Return TRUE or FALSE depending on whether the first SET in INSN
17869 has source and destination with matching CC modes, and that the
17870 CC mode is at least as constrained as REQ_MODE. */
17873 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17876 enum machine_mode set_mode;
17878 set = PATTERN (insn);
17879 if (GET_CODE (set) == PARALLEL)
17880 set = XVECEXP (set, 0, 0);
17881 gcc_assert (GET_CODE (set) == SET);
17882 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17884 set_mode = GET_MODE (SET_DEST (set));
17888 if (req_mode != CCNOmode
17889 && (req_mode != CCmode
17890 || XEXP (SET_SRC (set), 1) != const0_rtx))
17894 if (req_mode == CCGCmode)
17898 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17902 if (req_mode == CCZmode)
17912 if (set_mode != req_mode)
17917 gcc_unreachable ();
17920 return GET_MODE (SET_SRC (set)) == set_mode;
17923 /* Generate insn patterns to do an integer compare of OPERANDS. */
17926 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17928 enum machine_mode cmpmode;
17931 cmpmode = SELECT_CC_MODE (code, op0, op1);
17932 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17934 /* This is very simple, but making the interface the same as in the
17935 FP case makes the rest of the code easier. */
17936 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17937 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17939 /* Return the test that should be put into the flags user, i.e.
17940 the bcc, scc, or cmov instruction. */
17941 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17944 /* Figure out whether to use ordered or unordered fp comparisons.
17945 Return the appropriate mode to use. */
17948 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17950 /* ??? In order to make all comparisons reversible, we do all comparisons
17951 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17952 all forms trapping and nontrapping comparisons, we can make inequality
17953 comparisons trapping again, since it results in better code when using
17954 FCOM based compares. */
17955 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17959 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17961 enum machine_mode mode = GET_MODE (op0);
17963 if (SCALAR_FLOAT_MODE_P (mode))
17965 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17966 return ix86_fp_compare_mode (code);
17971 /* Only zero flag is needed. */
17972 case EQ: /* ZF=0 */
17973 case NE: /* ZF!=0 */
17975 /* Codes needing carry flag. */
17976 case GEU: /* CF=0 */
17977 case LTU: /* CF=1 */
17978 /* Detect overflow checks. They need just the carry flag. */
17979 if (GET_CODE (op0) == PLUS
17980 && rtx_equal_p (op1, XEXP (op0, 0)))
17984 case GTU: /* CF=0 & ZF=0 */
17985 case LEU: /* CF=1 | ZF=1 */
17986 /* Detect overflow checks. They need just the carry flag. */
17987 if (GET_CODE (op0) == MINUS
17988 && rtx_equal_p (op1, XEXP (op0, 0)))
17992 /* Codes possibly doable only with sign flag when
17993 comparing against zero. */
17994 case GE: /* SF=OF or SF=0 */
17995 case LT: /* SF<>OF or SF=1 */
17996 if (op1 == const0_rtx)
17999 /* For other cases Carry flag is not required. */
18001 /* Codes doable only with sign flag when comparing
18002 against zero, but we miss jump instruction for it
18003 so we need to use relational tests against overflow
18004 that thus needs to be zero. */
18005 case GT: /* ZF=0 & SF=OF */
18006 case LE: /* ZF=1 | SF<>OF */
18007 if (op1 == const0_rtx)
18011 /* strcmp pattern do (use flags) and combine may ask us for proper
18016 gcc_unreachable ();
18020 /* Return the fixed registers used for condition codes. */
18023 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
18030 /* If two condition code modes are compatible, return a condition code
18031 mode which is compatible with both. Otherwise, return
18034 static enum machine_mode
18035 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
18040 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
18043 if ((m1 == CCGCmode && m2 == CCGOCmode)
18044 || (m1 == CCGOCmode && m2 == CCGCmode))
18050 gcc_unreachable ();
18080 /* These are only compatible with themselves, which we already
18087 /* Return a comparison we can do and that it is equivalent to
18088 swap_condition (code) apart possibly from orderedness.
18089 But, never change orderedness if TARGET_IEEE_FP, returning
18090 UNKNOWN in that case if necessary. */
18092 static enum rtx_code
18093 ix86_fp_swap_condition (enum rtx_code code)
18097 case GT: /* GTU - CF=0 & ZF=0 */
18098 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
18099 case GE: /* GEU - CF=0 */
18100 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
18101 case UNLT: /* LTU - CF=1 */
18102 return TARGET_IEEE_FP ? UNKNOWN : GT;
18103 case UNLE: /* LEU - CF=1 | ZF=1 */
18104 return TARGET_IEEE_FP ? UNKNOWN : GE;
18106 return swap_condition (code);
18110 /* Return cost of comparison CODE using the best strategy for performance.
18111 All following functions do use number of instructions as a cost metrics.
18112 In future this should be tweaked to compute bytes for optimize_size and
18113 take into account performance of various instructions on various CPUs. */
18116 ix86_fp_comparison_cost (enum rtx_code code)
18120 /* The cost of code using bit-twiddling on %ah. */
18137 arith_cost = TARGET_IEEE_FP ? 5 : 4;
18141 arith_cost = TARGET_IEEE_FP ? 6 : 4;
18144 gcc_unreachable ();
18147 switch (ix86_fp_comparison_strategy (code))
18149 case IX86_FPCMP_COMI:
18150 return arith_cost > 4 ? 3 : 2;
18151 case IX86_FPCMP_SAHF:
18152 return arith_cost > 4 ? 4 : 3;
18158 /* Return strategy to use for floating-point. We assume that fcomi is always
18159 preferrable where available, since that is also true when looking at size
18160 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18162 enum ix86_fpcmp_strategy
18163 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
18165 /* Do fcomi/sahf based test when profitable. */
18168 return IX86_FPCMP_COMI;
18170 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
18171 return IX86_FPCMP_SAHF;
18173 return IX86_FPCMP_ARITH;
18176 /* Swap, force into registers, or otherwise massage the two operands
18177 to a fp comparison. The operands are updated in place; the new
18178 comparison code is returned. */
18180 static enum rtx_code
18181 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
18183 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
18184 rtx op0 = *pop0, op1 = *pop1;
18185 enum machine_mode op_mode = GET_MODE (op0);
18186 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
18188 /* All of the unordered compare instructions only work on registers.
18189 The same is true of the fcomi compare instructions. The XFmode
18190 compare instructions require registers except when comparing
18191 against zero or when converting operand 1 from fixed point to
18195 && (fpcmp_mode == CCFPUmode
18196 || (op_mode == XFmode
18197 && ! (standard_80387_constant_p (op0) == 1
18198 || standard_80387_constant_p (op1) == 1)
18199 && GET_CODE (op1) != FLOAT)
18200 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
18202 op0 = force_reg (op_mode, op0);
18203 op1 = force_reg (op_mode, op1);
18207 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18208 things around if they appear profitable, otherwise force op0
18209 into a register. */
18211 if (standard_80387_constant_p (op0) == 0
18213 && ! (standard_80387_constant_p (op1) == 0
18216 enum rtx_code new_code = ix86_fp_swap_condition (code);
18217 if (new_code != UNKNOWN)
18220 tmp = op0, op0 = op1, op1 = tmp;
18226 op0 = force_reg (op_mode, op0);
18228 if (CONSTANT_P (op1))
18230 int tmp = standard_80387_constant_p (op1);
18232 op1 = validize_mem (force_const_mem (op_mode, op1));
18236 op1 = force_reg (op_mode, op1);
18239 op1 = force_reg (op_mode, op1);
18243 /* Try to rearrange the comparison to make it cheaper. */
18244 if (ix86_fp_comparison_cost (code)
18245 > ix86_fp_comparison_cost (swap_condition (code))
18246 && (REG_P (op1) || can_create_pseudo_p ()))
18249 tmp = op0, op0 = op1, op1 = tmp;
18250 code = swap_condition (code);
18252 op0 = force_reg (op_mode, op0);
18260 /* Convert comparison codes we use to represent FP comparison to integer
18261 code that will result in proper branch. Return UNKNOWN if no such code
18265 ix86_fp_compare_code_to_integer (enum rtx_code code)
18294 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18297 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18299 enum machine_mode fpcmp_mode, intcmp_mode;
18302 fpcmp_mode = ix86_fp_compare_mode (code);
18303 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18305 /* Do fcomi/sahf based test when profitable. */
18306 switch (ix86_fp_comparison_strategy (code))
18308 case IX86_FPCMP_COMI:
18309 intcmp_mode = fpcmp_mode;
18310 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18311 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18316 case IX86_FPCMP_SAHF:
18317 intcmp_mode = fpcmp_mode;
18318 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18319 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18323 scratch = gen_reg_rtx (HImode);
18324 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
18325 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
18328 case IX86_FPCMP_ARITH:
18329 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18330 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18331 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
18333 scratch = gen_reg_rtx (HImode);
18334 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
18336 /* In the unordered case, we have to check C2 for NaN's, which
18337 doesn't happen to work out to anything nice combination-wise.
18338 So do some bit twiddling on the value we've got in AH to come
18339 up with an appropriate set of condition codes. */
18341 intcmp_mode = CCNOmode;
18346 if (code == GT || !TARGET_IEEE_FP)
18348 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18353 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18354 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18355 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
18356 intcmp_mode = CCmode;
18362 if (code == LT && TARGET_IEEE_FP)
18364 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18365 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
18366 intcmp_mode = CCmode;
18371 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
18377 if (code == GE || !TARGET_IEEE_FP)
18379 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
18384 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18385 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
18391 if (code == LE && TARGET_IEEE_FP)
18393 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18394 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18395 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18396 intcmp_mode = CCmode;
18401 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18407 if (code == EQ && TARGET_IEEE_FP)
18409 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18410 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18411 intcmp_mode = CCmode;
18416 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18422 if (code == NE && TARGET_IEEE_FP)
18424 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18425 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
18431 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18437 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18441 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18446 gcc_unreachable ();
18454 /* Return the test that should be put into the flags user, i.e.
18455 the bcc, scc, or cmov instruction. */
18456 return gen_rtx_fmt_ee (code, VOIDmode,
18457 gen_rtx_REG (intcmp_mode, FLAGS_REG),
18462 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
18466 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
18467 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
18469 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
18471 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
18472 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18475 ret = ix86_expand_int_compare (code, op0, op1);
18481 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
18483 enum machine_mode mode = GET_MODE (op0);
18495 tmp = ix86_expand_compare (code, op0, op1);
18496 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18497 gen_rtx_LABEL_REF (VOIDmode, label),
18499 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18506 /* Expand DImode branch into multiple compare+branch. */
18508 rtx lo[2], hi[2], label2;
18509 enum rtx_code code1, code2, code3;
18510 enum machine_mode submode;
18512 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18514 tmp = op0, op0 = op1, op1 = tmp;
18515 code = swap_condition (code);
18518 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18519 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18521 submode = mode == DImode ? SImode : DImode;
18523 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18524 avoid two branches. This costs one extra insn, so disable when
18525 optimizing for size. */
18527 if ((code == EQ || code == NE)
18528 && (!optimize_insn_for_size_p ()
18529 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18534 if (hi[1] != const0_rtx)
18535 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18536 NULL_RTX, 0, OPTAB_WIDEN);
18539 if (lo[1] != const0_rtx)
18540 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18541 NULL_RTX, 0, OPTAB_WIDEN);
18543 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18544 NULL_RTX, 0, OPTAB_WIDEN);
18546 ix86_expand_branch (code, tmp, const0_rtx, label);
18550 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18551 op1 is a constant and the low word is zero, then we can just
18552 examine the high word. Similarly for low word -1 and
18553 less-or-equal-than or greater-than. */
18555 if (CONST_INT_P (hi[1]))
18558 case LT: case LTU: case GE: case GEU:
18559 if (lo[1] == const0_rtx)
18561 ix86_expand_branch (code, hi[0], hi[1], label);
18565 case LE: case LEU: case GT: case GTU:
18566 if (lo[1] == constm1_rtx)
18568 ix86_expand_branch (code, hi[0], hi[1], label);
18576 /* Otherwise, we need two or three jumps. */
18578 label2 = gen_label_rtx ();
18581 code2 = swap_condition (code);
18582 code3 = unsigned_condition (code);
18586 case LT: case GT: case LTU: case GTU:
18589 case LE: code1 = LT; code2 = GT; break;
18590 case GE: code1 = GT; code2 = LT; break;
18591 case LEU: code1 = LTU; code2 = GTU; break;
18592 case GEU: code1 = GTU; code2 = LTU; break;
18594 case EQ: code1 = UNKNOWN; code2 = NE; break;
18595 case NE: code2 = UNKNOWN; break;
18598 gcc_unreachable ();
18603 * if (hi(a) < hi(b)) goto true;
18604 * if (hi(a) > hi(b)) goto false;
18605 * if (lo(a) < lo(b)) goto true;
18609 if (code1 != UNKNOWN)
18610 ix86_expand_branch (code1, hi[0], hi[1], label);
18611 if (code2 != UNKNOWN)
18612 ix86_expand_branch (code2, hi[0], hi[1], label2);
18614 ix86_expand_branch (code3, lo[0], lo[1], label);
18616 if (code2 != UNKNOWN)
18617 emit_label (label2);
18622 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18627 /* Split branch based on floating point condition. */
18629 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18630 rtx target1, rtx target2, rtx tmp, rtx pushed)
18635 if (target2 != pc_rtx)
18638 code = reverse_condition_maybe_unordered (code);
18643 condition = ix86_expand_fp_compare (code, op1, op2,
18646 /* Remove pushed operand from stack. */
18648 ix86_free_from_memory (GET_MODE (pushed));
18650 i = emit_jump_insn (gen_rtx_SET
18652 gen_rtx_IF_THEN_ELSE (VOIDmode,
18653 condition, target1, target2)));
18654 if (split_branch_probability >= 0)
18655 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18659 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18663 gcc_assert (GET_MODE (dest) == QImode);
18665 ret = ix86_expand_compare (code, op0, op1);
18666 PUT_MODE (ret, QImode);
18667 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18670 /* Expand comparison setting or clearing carry flag. Return true when
18671 successful and set pop for the operation. */
18673 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18675 enum machine_mode mode =
18676 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18678 /* Do not handle double-mode compares that go through special path. */
18679 if (mode == (TARGET_64BIT ? TImode : DImode))
18682 if (SCALAR_FLOAT_MODE_P (mode))
18684 rtx compare_op, compare_seq;
18686 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18688 /* Shortcut: following common codes never translate
18689 into carry flag compares. */
18690 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18691 || code == ORDERED || code == UNORDERED)
18694 /* These comparisons require zero flag; swap operands so they won't. */
18695 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18696 && !TARGET_IEEE_FP)
18701 code = swap_condition (code);
18704 /* Try to expand the comparison and verify that we end up with
18705 carry flag based comparison. This fails to be true only when
18706 we decide to expand comparison using arithmetic that is not
18707 too common scenario. */
18709 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18710 compare_seq = get_insns ();
18713 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18714 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18715 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18717 code = GET_CODE (compare_op);
18719 if (code != LTU && code != GEU)
18722 emit_insn (compare_seq);
18727 if (!INTEGRAL_MODE_P (mode))
18736 /* Convert a==0 into (unsigned)a<1. */
18739 if (op1 != const0_rtx)
18742 code = (code == EQ ? LTU : GEU);
18745 /* Convert a>b into b<a or a>=b-1. */
18748 if (CONST_INT_P (op1))
18750 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18751 /* Bail out on overflow. We still can swap operands but that
18752 would force loading of the constant into register. */
18753 if (op1 == const0_rtx
18754 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18756 code = (code == GTU ? GEU : LTU);
18763 code = (code == GTU ? LTU : GEU);
18767 /* Convert a>=0 into (unsigned)a<0x80000000. */
18770 if (mode == DImode || op1 != const0_rtx)
18772 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18773 code = (code == LT ? GEU : LTU);
18777 if (mode == DImode || op1 != constm1_rtx)
18779 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18780 code = (code == LE ? GEU : LTU);
18786 /* Swapping operands may cause constant to appear as first operand. */
18787 if (!nonimmediate_operand (op0, VOIDmode))
18789 if (!can_create_pseudo_p ())
18791 op0 = force_reg (mode, op0);
18793 *pop = ix86_expand_compare (code, op0, op1);
18794 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18799 ix86_expand_int_movcc (rtx operands[])
18801 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18802 rtx compare_seq, compare_op;
18803 enum machine_mode mode = GET_MODE (operands[0]);
18804 bool sign_bit_compare_p = false;
18805 rtx op0 = XEXP (operands[1], 0);
18806 rtx op1 = XEXP (operands[1], 1);
18809 compare_op = ix86_expand_compare (code, op0, op1);
18810 compare_seq = get_insns ();
18813 compare_code = GET_CODE (compare_op);
18815 if ((op1 == const0_rtx && (code == GE || code == LT))
18816 || (op1 == constm1_rtx && (code == GT || code == LE)))
18817 sign_bit_compare_p = true;
18819 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18820 HImode insns, we'd be swallowed in word prefix ops. */
18822 if ((mode != HImode || TARGET_FAST_PREFIX)
18823 && (mode != (TARGET_64BIT ? TImode : DImode))
18824 && CONST_INT_P (operands[2])
18825 && CONST_INT_P (operands[3]))
18827 rtx out = operands[0];
18828 HOST_WIDE_INT ct = INTVAL (operands[2]);
18829 HOST_WIDE_INT cf = INTVAL (operands[3]);
18830 HOST_WIDE_INT diff;
18833 /* Sign bit compares are better done using shifts than we do by using
18835 if (sign_bit_compare_p
18836 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18838 /* Detect overlap between destination and compare sources. */
18841 if (!sign_bit_compare_p)
18844 bool fpcmp = false;
18846 compare_code = GET_CODE (compare_op);
18848 flags = XEXP (compare_op, 0);
18850 if (GET_MODE (flags) == CCFPmode
18851 || GET_MODE (flags) == CCFPUmode)
18855 = ix86_fp_compare_code_to_integer (compare_code);
18858 /* To simplify rest of code, restrict to the GEU case. */
18859 if (compare_code == LTU)
18861 HOST_WIDE_INT tmp = ct;
18864 compare_code = reverse_condition (compare_code);
18865 code = reverse_condition (code);
18870 PUT_CODE (compare_op,
18871 reverse_condition_maybe_unordered
18872 (GET_CODE (compare_op)));
18874 PUT_CODE (compare_op,
18875 reverse_condition (GET_CODE (compare_op)));
18879 if (reg_overlap_mentioned_p (out, op0)
18880 || reg_overlap_mentioned_p (out, op1))
18881 tmp = gen_reg_rtx (mode);
18883 if (mode == DImode)
18884 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18886 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18887 flags, compare_op));
18891 if (code == GT || code == GE)
18892 code = reverse_condition (code);
18895 HOST_WIDE_INT tmp = ct;
18900 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18913 tmp = expand_simple_binop (mode, PLUS,
18915 copy_rtx (tmp), 1, OPTAB_DIRECT);
18926 tmp = expand_simple_binop (mode, IOR,
18928 copy_rtx (tmp), 1, OPTAB_DIRECT);
18930 else if (diff == -1 && ct)
18940 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18942 tmp = expand_simple_binop (mode, PLUS,
18943 copy_rtx (tmp), GEN_INT (cf),
18944 copy_rtx (tmp), 1, OPTAB_DIRECT);
18952 * andl cf - ct, dest
18962 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18965 tmp = expand_simple_binop (mode, AND,
18967 gen_int_mode (cf - ct, mode),
18968 copy_rtx (tmp), 1, OPTAB_DIRECT);
18970 tmp = expand_simple_binop (mode, PLUS,
18971 copy_rtx (tmp), GEN_INT (ct),
18972 copy_rtx (tmp), 1, OPTAB_DIRECT);
18975 if (!rtx_equal_p (tmp, out))
18976 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18983 enum machine_mode cmp_mode = GET_MODE (op0);
18986 tmp = ct, ct = cf, cf = tmp;
18989 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18991 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18993 /* We may be reversing unordered compare to normal compare, that
18994 is not valid in general (we may convert non-trapping condition
18995 to trapping one), however on i386 we currently emit all
18996 comparisons unordered. */
18997 compare_code = reverse_condition_maybe_unordered (compare_code);
18998 code = reverse_condition_maybe_unordered (code);
19002 compare_code = reverse_condition (compare_code);
19003 code = reverse_condition (code);
19007 compare_code = UNKNOWN;
19008 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
19009 && CONST_INT_P (op1))
19011 if (op1 == const0_rtx
19012 && (code == LT || code == GE))
19013 compare_code = code;
19014 else if (op1 == constm1_rtx)
19018 else if (code == GT)
19023 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19024 if (compare_code != UNKNOWN
19025 && GET_MODE (op0) == GET_MODE (out)
19026 && (cf == -1 || ct == -1))
19028 /* If lea code below could be used, only optimize
19029 if it results in a 2 insn sequence. */
19031 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
19032 || diff == 3 || diff == 5 || diff == 9)
19033 || (compare_code == LT && ct == -1)
19034 || (compare_code == GE && cf == -1))
19037 * notl op1 (if necessary)
19045 code = reverse_condition (code);
19048 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19050 out = expand_simple_binop (mode, IOR,
19052 out, 1, OPTAB_DIRECT);
19053 if (out != operands[0])
19054 emit_move_insn (operands[0], out);
19061 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
19062 || diff == 3 || diff == 5 || diff == 9)
19063 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
19065 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
19071 * lea cf(dest*(ct-cf)),dest
19075 * This also catches the degenerate setcc-only case.
19081 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19084 /* On x86_64 the lea instruction operates on Pmode, so we need
19085 to get arithmetics done in proper mode to match. */
19087 tmp = copy_rtx (out);
19091 out1 = copy_rtx (out);
19092 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
19096 tmp = gen_rtx_PLUS (mode, tmp, out1);
19102 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
19105 if (!rtx_equal_p (tmp, out))
19108 out = force_operand (tmp, copy_rtx (out));
19110 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
19112 if (!rtx_equal_p (out, operands[0]))
19113 emit_move_insn (operands[0], copy_rtx (out));
19119 * General case: Jumpful:
19120 * xorl dest,dest cmpl op1, op2
19121 * cmpl op1, op2 movl ct, dest
19122 * setcc dest jcc 1f
19123 * decl dest movl cf, dest
19124 * andl (cf-ct),dest 1:
19127 * Size 20. Size 14.
19129 * This is reasonably steep, but branch mispredict costs are
19130 * high on modern cpus, so consider failing only if optimizing
19134 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19135 && BRANCH_COST (optimize_insn_for_speed_p (),
19140 enum machine_mode cmp_mode = GET_MODE (op0);
19145 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19147 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19149 /* We may be reversing unordered compare to normal compare,
19150 that is not valid in general (we may convert non-trapping
19151 condition to trapping one), however on i386 we currently
19152 emit all comparisons unordered. */
19153 code = reverse_condition_maybe_unordered (code);
19157 code = reverse_condition (code);
19158 if (compare_code != UNKNOWN)
19159 compare_code = reverse_condition (compare_code);
19163 if (compare_code != UNKNOWN)
19165 /* notl op1 (if needed)
19170 For x < 0 (resp. x <= -1) there will be no notl,
19171 so if possible swap the constants to get rid of the
19173 True/false will be -1/0 while code below (store flag
19174 followed by decrement) is 0/-1, so the constants need
19175 to be exchanged once more. */
19177 if (compare_code == GE || !cf)
19179 code = reverse_condition (code);
19184 HOST_WIDE_INT tmp = cf;
19189 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19193 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19195 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
19197 copy_rtx (out), 1, OPTAB_DIRECT);
19200 out = expand_simple_binop (mode, AND, copy_rtx (out),
19201 gen_int_mode (cf - ct, mode),
19202 copy_rtx (out), 1, OPTAB_DIRECT);
19204 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
19205 copy_rtx (out), 1, OPTAB_DIRECT);
19206 if (!rtx_equal_p (out, operands[0]))
19207 emit_move_insn (operands[0], copy_rtx (out));
19213 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19215 /* Try a few things more with specific constants and a variable. */
19218 rtx var, orig_out, out, tmp;
19220 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19223 /* If one of the two operands is an interesting constant, load a
19224 constant with the above and mask it in with a logical operation. */
19226 if (CONST_INT_P (operands[2]))
19229 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
19230 operands[3] = constm1_rtx, op = and_optab;
19231 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
19232 operands[3] = const0_rtx, op = ior_optab;
19236 else if (CONST_INT_P (operands[3]))
19239 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
19240 operands[2] = constm1_rtx, op = and_optab;
19241 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
19242 operands[2] = const0_rtx, op = ior_optab;
19249 orig_out = operands[0];
19250 tmp = gen_reg_rtx (mode);
19253 /* Recurse to get the constant loaded. */
19254 if (ix86_expand_int_movcc (operands) == 0)
19257 /* Mask in the interesting variable. */
19258 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19260 if (!rtx_equal_p (out, orig_out))
19261 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19267 * For comparison with above,
19277 if (! nonimmediate_operand (operands[2], mode))
19278 operands[2] = force_reg (mode, operands[2]);
19279 if (! nonimmediate_operand (operands[3], mode))
19280 operands[3] = force_reg (mode, operands[3]);
19282 if (! register_operand (operands[2], VOIDmode)
19284 || ! register_operand (operands[3], VOIDmode)))
19285 operands[2] = force_reg (mode, operands[2]);
19288 && ! register_operand (operands[3], VOIDmode))
19289 operands[3] = force_reg (mode, operands[3]);
19291 emit_insn (compare_seq);
19292 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19293 gen_rtx_IF_THEN_ELSE (mode,
19294 compare_op, operands[2],
19299 /* Swap, force into registers, or otherwise massage the two operands
19300 to an sse comparison with a mask result. Thus we differ a bit from
19301 ix86_prepare_fp_compare_args which expects to produce a flags result.
19303 The DEST operand exists to help determine whether to commute commutative
19304 operators. The POP0/POP1 operands are updated in place. The new
19305 comparison code is returned, or UNKNOWN if not implementable. */
19307 static enum rtx_code
19308 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19309 rtx *pop0, rtx *pop1)
19317 /* AVX supports all the needed comparisons. */
19320 /* We have no LTGT as an operator. We could implement it with
19321 NE & ORDERED, but this requires an extra temporary. It's
19322 not clear that it's worth it. */
19329 /* These are supported directly. */
19336 /* AVX has 3 operand comparisons, no need to swap anything. */
19339 /* For commutative operators, try to canonicalize the destination
19340 operand to be first in the comparison - this helps reload to
19341 avoid extra moves. */
19342 if (!dest || !rtx_equal_p (dest, *pop1))
19350 /* These are not supported directly before AVX, and furthermore
19351 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19352 comparison operands to transform into something that is
19357 code = swap_condition (code);
19361 gcc_unreachable ();
19367 /* Detect conditional moves that exactly match min/max operational
19368 semantics. Note that this is IEEE safe, as long as we don't
19369 interchange the operands.
19371 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19372 and TRUE if the operation is successful and instructions are emitted. */
19375 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
19376 rtx cmp_op1, rtx if_true, rtx if_false)
19378 enum machine_mode mode;
19384 else if (code == UNGE)
19387 if_true = if_false;
19393 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
19395 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
19400 mode = GET_MODE (dest);
19402 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19403 but MODE may be a vector mode and thus not appropriate. */
19404 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
19406 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
19409 if_true = force_reg (mode, if_true);
19410 v = gen_rtvec (2, if_true, if_false);
19411 tmp = gen_rtx_UNSPEC (mode, v, u);
19415 code = is_min ? SMIN : SMAX;
19416 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
19419 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
19423 /* Expand an sse vector comparison. Return the register with the result. */
19426 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
19427 rtx op_true, rtx op_false)
19429 enum machine_mode mode = GET_MODE (dest);
19430 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
19433 cmp_op0 = force_reg (cmp_mode, cmp_op0);
19434 if (!nonimmediate_operand (cmp_op1, cmp_mode))
19435 cmp_op1 = force_reg (cmp_mode, cmp_op1);
19438 || reg_overlap_mentioned_p (dest, op_true)
19439 || reg_overlap_mentioned_p (dest, op_false))
19440 dest = gen_reg_rtx (mode);
19442 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
19443 if (cmp_mode != mode)
19445 x = force_reg (cmp_mode, x);
19446 convert_move (dest, x, false);
19449 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19454 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19455 operations. This is used for both scalar and vector conditional moves. */
19458 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
19460 enum machine_mode mode = GET_MODE (dest);
19463 if (vector_all_ones_operand (op_true, mode)
19464 && rtx_equal_p (op_false, CONST0_RTX (mode)))
19466 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
19468 else if (op_false == CONST0_RTX (mode))
19470 op_true = force_reg (mode, op_true);
19471 x = gen_rtx_AND (mode, cmp, op_true);
19472 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19474 else if (op_true == CONST0_RTX (mode))
19476 op_false = force_reg (mode, op_false);
19477 x = gen_rtx_NOT (mode, cmp);
19478 x = gen_rtx_AND (mode, x, op_false);
19479 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19481 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
19483 op_false = force_reg (mode, op_false);
19484 x = gen_rtx_IOR (mode, cmp, op_false);
19485 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19487 else if (TARGET_XOP)
19489 op_true = force_reg (mode, op_true);
19491 if (!nonimmediate_operand (op_false, mode))
19492 op_false = force_reg (mode, op_false);
19494 emit_insn (gen_rtx_SET (mode, dest,
19495 gen_rtx_IF_THEN_ELSE (mode, cmp,
19501 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
19503 if (!nonimmediate_operand (op_true, mode))
19504 op_true = force_reg (mode, op_true);
19506 op_false = force_reg (mode, op_false);
19512 gen = gen_sse4_1_blendvps;
19516 gen = gen_sse4_1_blendvpd;
19524 gen = gen_sse4_1_pblendvb;
19525 dest = gen_lowpart (V16QImode, dest);
19526 op_false = gen_lowpart (V16QImode, op_false);
19527 op_true = gen_lowpart (V16QImode, op_true);
19528 cmp = gen_lowpart (V16QImode, cmp);
19533 gen = gen_avx_blendvps256;
19537 gen = gen_avx_blendvpd256;
19545 gen = gen_avx2_pblendvb;
19546 dest = gen_lowpart (V32QImode, dest);
19547 op_false = gen_lowpart (V32QImode, op_false);
19548 op_true = gen_lowpart (V32QImode, op_true);
19549 cmp = gen_lowpart (V32QImode, cmp);
19557 emit_insn (gen (dest, op_false, op_true, cmp));
19560 op_true = force_reg (mode, op_true);
19562 t2 = gen_reg_rtx (mode);
19564 t3 = gen_reg_rtx (mode);
19568 x = gen_rtx_AND (mode, op_true, cmp);
19569 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19571 x = gen_rtx_NOT (mode, cmp);
19572 x = gen_rtx_AND (mode, x, op_false);
19573 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19575 x = gen_rtx_IOR (mode, t3, t2);
19576 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19581 /* Expand a floating-point conditional move. Return true if successful. */
19584 ix86_expand_fp_movcc (rtx operands[])
19586 enum machine_mode mode = GET_MODE (operands[0]);
19587 enum rtx_code code = GET_CODE (operands[1]);
19588 rtx tmp, compare_op;
19589 rtx op0 = XEXP (operands[1], 0);
19590 rtx op1 = XEXP (operands[1], 1);
19592 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19594 enum machine_mode cmode;
19596 /* Since we've no cmove for sse registers, don't force bad register
19597 allocation just to gain access to it. Deny movcc when the
19598 comparison mode doesn't match the move mode. */
19599 cmode = GET_MODE (op0);
19600 if (cmode == VOIDmode)
19601 cmode = GET_MODE (op1);
19605 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19606 if (code == UNKNOWN)
19609 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19610 operands[2], operands[3]))
19613 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19614 operands[2], operands[3]);
19615 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19619 /* The floating point conditional move instructions don't directly
19620 support conditions resulting from a signed integer comparison. */
19622 compare_op = ix86_expand_compare (code, op0, op1);
19623 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19625 tmp = gen_reg_rtx (QImode);
19626 ix86_expand_setcc (tmp, code, op0, op1);
19628 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19631 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19632 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19633 operands[2], operands[3])));
19638 /* Expand a floating-point vector conditional move; a vcond operation
19639 rather than a movcc operation. */
19642 ix86_expand_fp_vcond (rtx operands[])
19644 enum rtx_code code = GET_CODE (operands[3]);
19647 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19648 &operands[4], &operands[5]);
19649 if (code == UNKNOWN)
19652 switch (GET_CODE (operands[3]))
19655 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19656 operands[5], operands[0], operands[0]);
19657 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19658 operands[5], operands[1], operands[2]);
19662 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19663 operands[5], operands[0], operands[0]);
19664 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19665 operands[5], operands[1], operands[2]);
19669 gcc_unreachable ();
19671 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19673 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19677 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19678 operands[5], operands[1], operands[2]))
19681 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19682 operands[1], operands[2]);
19683 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19687 /* Expand a signed/unsigned integral vector conditional move. */
19690 ix86_expand_int_vcond (rtx operands[])
19692 enum machine_mode data_mode = GET_MODE (operands[0]);
19693 enum machine_mode mode = GET_MODE (operands[4]);
19694 enum rtx_code code = GET_CODE (operands[3]);
19695 bool negate = false;
19698 cop0 = operands[4];
19699 cop1 = operands[5];
19701 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19702 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19703 if ((code == LT || code == GE)
19704 && data_mode == mode
19705 && cop1 == CONST0_RTX (mode)
19706 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
19707 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
19708 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
19709 && (GET_MODE_SIZE (data_mode) == 16
19710 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
19712 rtx negop = operands[2 - (code == LT)];
19713 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
19714 if (negop == CONST1_RTX (data_mode))
19716 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
19717 operands[0], 1, OPTAB_DIRECT);
19718 if (res != operands[0])
19719 emit_move_insn (operands[0], res);
19722 else if (GET_MODE_INNER (data_mode) != DImode
19723 && vector_all_ones_operand (negop, data_mode))
19725 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
19726 operands[0], 0, OPTAB_DIRECT);
19727 if (res != operands[0])
19728 emit_move_insn (operands[0], res);
19733 if (!nonimmediate_operand (cop1, mode))
19734 cop1 = force_reg (mode, cop1);
19735 if (!general_operand (operands[1], data_mode))
19736 operands[1] = force_reg (data_mode, operands[1]);
19737 if (!general_operand (operands[2], data_mode))
19738 operands[2] = force_reg (data_mode, operands[2]);
19740 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19742 && (mode == V16QImode || mode == V8HImode
19743 || mode == V4SImode || mode == V2DImode))
19747 /* Canonicalize the comparison to EQ, GT, GTU. */
19758 code = reverse_condition (code);
19764 code = reverse_condition (code);
19770 code = swap_condition (code);
19771 x = cop0, cop0 = cop1, cop1 = x;
19775 gcc_unreachable ();
19778 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19779 if (mode == V2DImode)
19784 /* SSE4.1 supports EQ. */
19785 if (!TARGET_SSE4_1)
19791 /* SSE4.2 supports GT/GTU. */
19792 if (!TARGET_SSE4_2)
19797 gcc_unreachable ();
19801 /* Unsigned parallel compare is not supported by the hardware.
19802 Play some tricks to turn this into a signed comparison
19806 cop0 = force_reg (mode, cop0);
19816 rtx (*gen_sub3) (rtx, rtx, rtx);
19820 case V8SImode: gen_sub3 = gen_subv8si3; break;
19821 case V4DImode: gen_sub3 = gen_subv4di3; break;
19822 case V4SImode: gen_sub3 = gen_subv4si3; break;
19823 case V2DImode: gen_sub3 = gen_subv2di3; break;
19825 gcc_unreachable ();
19827 /* Subtract (-(INT MAX) - 1) from both operands to make
19829 mask = ix86_build_signbit_mask (mode, true, false);
19830 t1 = gen_reg_rtx (mode);
19831 emit_insn (gen_sub3 (t1, cop0, mask));
19833 t2 = gen_reg_rtx (mode);
19834 emit_insn (gen_sub3 (t2, cop1, mask));
19846 /* Perform a parallel unsigned saturating subtraction. */
19847 x = gen_reg_rtx (mode);
19848 emit_insn (gen_rtx_SET (VOIDmode, x,
19849 gen_rtx_US_MINUS (mode, cop0, cop1)));
19852 cop1 = CONST0_RTX (mode);
19858 gcc_unreachable ();
19863 /* Allow the comparison to be done in one mode, but the movcc to
19864 happen in another mode. */
19865 if (data_mode == mode)
19867 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19868 operands[1+negate], operands[2-negate]);
19872 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
19873 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
19875 operands[1+negate], operands[2-negate]);
19876 x = gen_lowpart (data_mode, x);
19879 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19880 operands[2-negate]);
19884 /* Expand a variable vector permutation. */
19887 ix86_expand_vec_perm (rtx operands[])
19889 rtx target = operands[0];
19890 rtx op0 = operands[1];
19891 rtx op1 = operands[2];
19892 rtx mask = operands[3];
19893 rtx t1, t2, t3, t4, vt, vt2, vec[32];
19894 enum machine_mode mode = GET_MODE (op0);
19895 enum machine_mode maskmode = GET_MODE (mask);
19897 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19899 /* Number of elements in the vector. */
19900 w = GET_MODE_NUNITS (mode);
19901 e = GET_MODE_UNIT_SIZE (mode);
19902 gcc_assert (w <= 32);
19906 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
19908 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19909 an constant shuffle operand. With a tiny bit of effort we can
19910 use VPERMD instead. A re-interpretation stall for V4DFmode is
19911 unfortunate but there's no avoiding it.
19912 Similarly for V16HImode we don't have instructions for variable
19913 shuffling, while for V32QImode we can use after preparing suitable
19914 masks vpshufb; vpshufb; vpermq; vpor. */
19916 if (mode == V16HImode)
19918 maskmode = mode = V32QImode;
19924 maskmode = mode = V8SImode;
19928 t1 = gen_reg_rtx (maskmode);
19930 /* Replicate the low bits of the V4DImode mask into V8SImode:
19932 t1 = { A A B B C C D D }. */
19933 for (i = 0; i < w / 2; ++i)
19934 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
19935 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19936 vt = force_reg (maskmode, vt);
19937 mask = gen_lowpart (maskmode, mask);
19938 if (maskmode == V8SImode)
19939 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
19941 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
19943 /* Multiply the shuffle indicies by two. */
19944 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
19947 /* Add one to the odd shuffle indicies:
19948 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19949 for (i = 0; i < w / 2; ++i)
19951 vec[i * 2] = const0_rtx;
19952 vec[i * 2 + 1] = const1_rtx;
19954 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19955 vt = force_const_mem (maskmode, vt);
19956 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
19959 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19960 operands[3] = mask = t1;
19961 target = gen_lowpart (mode, target);
19962 op0 = gen_lowpart (mode, op0);
19963 op1 = gen_lowpart (mode, op1);
19969 /* The VPERMD and VPERMPS instructions already properly ignore
19970 the high bits of the shuffle elements. No need for us to
19971 perform an AND ourselves. */
19972 if (one_operand_shuffle)
19973 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
19976 t1 = gen_reg_rtx (V8SImode);
19977 t2 = gen_reg_rtx (V8SImode);
19978 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
19979 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
19985 mask = gen_lowpart (V8SFmode, mask);
19986 if (one_operand_shuffle)
19987 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
19990 t1 = gen_reg_rtx (V8SFmode);
19991 t2 = gen_reg_rtx (V8SFmode);
19992 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
19993 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
19999 /* By combining the two 128-bit input vectors into one 256-bit
20000 input vector, we can use VPERMD and VPERMPS for the full
20001 two-operand shuffle. */
20002 t1 = gen_reg_rtx (V8SImode);
20003 t2 = gen_reg_rtx (V8SImode);
20004 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
20005 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20006 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
20007 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
20011 t1 = gen_reg_rtx (V8SFmode);
20012 t2 = gen_reg_rtx (V8SImode);
20013 mask = gen_lowpart (V4SImode, mask);
20014 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
20015 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20016 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
20017 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
20021 t1 = gen_reg_rtx (V32QImode);
20022 t2 = gen_reg_rtx (V32QImode);
20023 t3 = gen_reg_rtx (V32QImode);
20024 vt2 = GEN_INT (128);
20025 for (i = 0; i < 32; i++)
20027 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20028 vt = force_reg (V32QImode, vt);
20029 for (i = 0; i < 32; i++)
20030 vec[i] = i < 16 ? vt2 : const0_rtx;
20031 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20032 vt2 = force_reg (V32QImode, vt2);
20033 /* From mask create two adjusted masks, which contain the same
20034 bits as mask in the low 7 bits of each vector element.
20035 The first mask will have the most significant bit clear
20036 if it requests element from the same 128-bit lane
20037 and MSB set if it requests element from the other 128-bit lane.
20038 The second mask will have the opposite values of the MSB,
20039 and additionally will have its 128-bit lanes swapped.
20040 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20041 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20042 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20043 stands for other 12 bytes. */
20044 /* The bit whether element is from the same lane or the other
20045 lane is bit 4, so shift it up by 3 to the MSB position. */
20046 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
20047 gen_lowpart (V4DImode, mask),
20049 /* Clear MSB bits from the mask just in case it had them set. */
20050 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
20051 /* After this t1 will have MSB set for elements from other lane. */
20052 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
20053 /* Clear bits other than MSB. */
20054 emit_insn (gen_andv32qi3 (t1, t1, vt));
20055 /* Or in the lower bits from mask into t3. */
20056 emit_insn (gen_iorv32qi3 (t3, t1, t2));
20057 /* And invert MSB bits in t1, so MSB is set for elements from the same
20059 emit_insn (gen_xorv32qi3 (t1, t1, vt));
20060 /* Swap 128-bit lanes in t3. */
20061 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20062 gen_lowpart (V4DImode, t3),
20063 const2_rtx, GEN_INT (3),
20064 const0_rtx, const1_rtx));
20065 /* And or in the lower bits from mask into t1. */
20066 emit_insn (gen_iorv32qi3 (t1, t1, t2));
20067 if (one_operand_shuffle)
20069 /* Each of these shuffles will put 0s in places where
20070 element from the other 128-bit lane is needed, otherwise
20071 will shuffle in the requested value. */
20072 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
20073 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
20074 /* For t3 the 128-bit lanes are swapped again. */
20075 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20076 gen_lowpart (V4DImode, t3),
20077 const2_rtx, GEN_INT (3),
20078 const0_rtx, const1_rtx));
20079 /* And oring both together leads to the result. */
20080 emit_insn (gen_iorv32qi3 (target, t1, t3));
20084 t4 = gen_reg_rtx (V32QImode);
20085 /* Similarly to the above one_operand_shuffle code,
20086 just for repeated twice for each operand. merge_two:
20087 code will merge the two results together. */
20088 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
20089 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
20090 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
20091 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
20092 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
20093 gen_lowpart (V4DImode, t4),
20094 const2_rtx, GEN_INT (3),
20095 const0_rtx, const1_rtx));
20096 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20097 gen_lowpart (V4DImode, t3),
20098 const2_rtx, GEN_INT (3),
20099 const0_rtx, const1_rtx));
20100 emit_insn (gen_iorv32qi3 (t4, t2, t4));
20101 emit_insn (gen_iorv32qi3 (t3, t1, t3));
20107 gcc_assert (GET_MODE_SIZE (mode) <= 16);
20114 /* The XOP VPPERM insn supports three inputs. By ignoring the
20115 one_operand_shuffle special case, we avoid creating another
20116 set of constant vectors in memory. */
20117 one_operand_shuffle = false;
20119 /* mask = mask & {2*w-1, ...} */
20120 vt = GEN_INT (2*w - 1);
20124 /* mask = mask & {w-1, ...} */
20125 vt = GEN_INT (w - 1);
20128 for (i = 0; i < w; i++)
20130 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20131 mask = expand_simple_binop (maskmode, AND, mask, vt,
20132 NULL_RTX, 0, OPTAB_DIRECT);
20134 /* For non-QImode operations, convert the word permutation control
20135 into a byte permutation control. */
20136 if (mode != V16QImode)
20138 mask = expand_simple_binop (maskmode, ASHIFT, mask,
20139 GEN_INT (exact_log2 (e)),
20140 NULL_RTX, 0, OPTAB_DIRECT);
20142 /* Convert mask to vector of chars. */
20143 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
20145 /* Replicate each of the input bytes into byte positions:
20146 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20147 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20148 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20149 for (i = 0; i < 16; ++i)
20150 vec[i] = GEN_INT (i/e * e);
20151 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20152 vt = force_const_mem (V16QImode, vt);
20154 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
20156 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
20158 /* Convert it into the byte positions by doing
20159 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20160 for (i = 0; i < 16; ++i)
20161 vec[i] = GEN_INT (i % e);
20162 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20163 vt = force_const_mem (V16QImode, vt);
20164 emit_insn (gen_addv16qi3 (mask, mask, vt));
20167 /* The actual shuffle operations all operate on V16QImode. */
20168 op0 = gen_lowpart (V16QImode, op0);
20169 op1 = gen_lowpart (V16QImode, op1);
20170 target = gen_lowpart (V16QImode, target);
20174 emit_insn (gen_xop_pperm (target, op0, op1, mask));
20176 else if (one_operand_shuffle)
20178 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
20185 /* Shuffle the two input vectors independently. */
20186 t1 = gen_reg_rtx (V16QImode);
20187 t2 = gen_reg_rtx (V16QImode);
20188 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
20189 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
20192 /* Then merge them together. The key is whether any given control
20193 element contained a bit set that indicates the second word. */
20194 mask = operands[3];
20196 if (maskmode == V2DImode && !TARGET_SSE4_1)
20198 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20199 more shuffle to convert the V2DI input mask into a V4SI
20200 input mask. At which point the masking that expand_int_vcond
20201 will work as desired. */
20202 rtx t3 = gen_reg_rtx (V4SImode);
20203 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
20204 const0_rtx, const0_rtx,
20205 const2_rtx, const2_rtx));
20207 maskmode = V4SImode;
20211 for (i = 0; i < w; i++)
20213 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20214 vt = force_reg (maskmode, vt);
20215 mask = expand_simple_binop (maskmode, AND, mask, vt,
20216 NULL_RTX, 0, OPTAB_DIRECT);
20218 xops[0] = gen_lowpart (mode, operands[0]);
20219 xops[1] = gen_lowpart (mode, t2);
20220 xops[2] = gen_lowpart (mode, t1);
20221 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
20224 ok = ix86_expand_int_vcond (xops);
20229 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20230 true if we should do zero extension, else sign extension. HIGH_P is
20231 true if we want the N/2 high elements, else the low elements. */
20234 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
20236 enum machine_mode imode = GET_MODE (operands[1]);
20241 rtx (*unpack)(rtx, rtx);
20242 rtx (*extract)(rtx, rtx) = NULL;
20243 enum machine_mode halfmode = BLKmode;
20249 unpack = gen_avx2_zero_extendv16qiv16hi2;
20251 unpack = gen_avx2_sign_extendv16qiv16hi2;
20252 halfmode = V16QImode;
20254 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20258 unpack = gen_avx2_zero_extendv8hiv8si2;
20260 unpack = gen_avx2_sign_extendv8hiv8si2;
20261 halfmode = V8HImode;
20263 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20267 unpack = gen_avx2_zero_extendv4siv4di2;
20269 unpack = gen_avx2_sign_extendv4siv4di2;
20270 halfmode = V4SImode;
20272 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20276 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20278 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20282 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20284 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20288 unpack = gen_sse4_1_zero_extendv2siv2di2;
20290 unpack = gen_sse4_1_sign_extendv2siv2di2;
20293 gcc_unreachable ();
20296 if (GET_MODE_SIZE (imode) == 32)
20298 tmp = gen_reg_rtx (halfmode);
20299 emit_insn (extract (tmp, operands[1]));
20303 /* Shift higher 8 bytes to lower 8 bytes. */
20304 tmp = gen_reg_rtx (imode);
20305 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20306 gen_lowpart (V1TImode, operands[1]),
20312 emit_insn (unpack (operands[0], tmp));
20316 rtx (*unpack)(rtx, rtx, rtx);
20322 unpack = gen_vec_interleave_highv16qi;
20324 unpack = gen_vec_interleave_lowv16qi;
20328 unpack = gen_vec_interleave_highv8hi;
20330 unpack = gen_vec_interleave_lowv8hi;
20334 unpack = gen_vec_interleave_highv4si;
20336 unpack = gen_vec_interleave_lowv4si;
20339 gcc_unreachable ();
20342 dest = gen_lowpart (imode, operands[0]);
20345 tmp = force_reg (imode, CONST0_RTX (imode));
20347 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
20348 operands[1], pc_rtx, pc_rtx);
20350 emit_insn (unpack (dest, operands[1], tmp));
20354 /* Expand conditional increment or decrement using adb/sbb instructions.
20355 The default case using setcc followed by the conditional move can be
20356 done by generic code. */
20358 ix86_expand_int_addcc (rtx operands[])
20360 enum rtx_code code = GET_CODE (operands[1]);
20362 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20364 rtx val = const0_rtx;
20365 bool fpcmp = false;
20366 enum machine_mode mode;
20367 rtx op0 = XEXP (operands[1], 0);
20368 rtx op1 = XEXP (operands[1], 1);
20370 if (operands[3] != const1_rtx
20371 && operands[3] != constm1_rtx)
20373 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20375 code = GET_CODE (compare_op);
20377 flags = XEXP (compare_op, 0);
20379 if (GET_MODE (flags) == CCFPmode
20380 || GET_MODE (flags) == CCFPUmode)
20383 code = ix86_fp_compare_code_to_integer (code);
20390 PUT_CODE (compare_op,
20391 reverse_condition_maybe_unordered
20392 (GET_CODE (compare_op)));
20394 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
20397 mode = GET_MODE (operands[0]);
20399 /* Construct either adc or sbb insn. */
20400 if ((code == LTU) == (operands[3] == constm1_rtx))
20405 insn = gen_subqi3_carry;
20408 insn = gen_subhi3_carry;
20411 insn = gen_subsi3_carry;
20414 insn = gen_subdi3_carry;
20417 gcc_unreachable ();
20425 insn = gen_addqi3_carry;
20428 insn = gen_addhi3_carry;
20431 insn = gen_addsi3_carry;
20434 insn = gen_adddi3_carry;
20437 gcc_unreachable ();
20440 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
20446 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20447 but works for floating pointer parameters and nonoffsetable memories.
20448 For pushes, it returns just stack offsets; the values will be saved
20449 in the right order. Maximally three parts are generated. */
20452 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
20457 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
20459 size = (GET_MODE_SIZE (mode) + 4) / 8;
20461 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
20462 gcc_assert (size >= 2 && size <= 4);
20464 /* Optimize constant pool reference to immediates. This is used by fp
20465 moves, that force all constants to memory to allow combining. */
20466 if (MEM_P (operand) && MEM_READONLY_P (operand))
20468 rtx tmp = maybe_get_pool_constant (operand);
20473 if (MEM_P (operand) && !offsettable_memref_p (operand))
20475 /* The only non-offsetable memories we handle are pushes. */
20476 int ok = push_operand (operand, VOIDmode);
20480 operand = copy_rtx (operand);
20481 PUT_MODE (operand, Pmode);
20482 parts[0] = parts[1] = parts[2] = parts[3] = operand;
20486 if (GET_CODE (operand) == CONST_VECTOR)
20488 enum machine_mode imode = int_mode_for_mode (mode);
20489 /* Caution: if we looked through a constant pool memory above,
20490 the operand may actually have a different mode now. That's
20491 ok, since we want to pun this all the way back to an integer. */
20492 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
20493 gcc_assert (operand != NULL);
20499 if (mode == DImode)
20500 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20505 if (REG_P (operand))
20507 gcc_assert (reload_completed);
20508 for (i = 0; i < size; i++)
20509 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
20511 else if (offsettable_memref_p (operand))
20513 operand = adjust_address (operand, SImode, 0);
20514 parts[0] = operand;
20515 for (i = 1; i < size; i++)
20516 parts[i] = adjust_address (operand, SImode, 4 * i);
20518 else if (GET_CODE (operand) == CONST_DOUBLE)
20523 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20527 real_to_target (l, &r, mode);
20528 parts[3] = gen_int_mode (l[3], SImode);
20529 parts[2] = gen_int_mode (l[2], SImode);
20532 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
20533 parts[2] = gen_int_mode (l[2], SImode);
20536 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
20539 gcc_unreachable ();
20541 parts[1] = gen_int_mode (l[1], SImode);
20542 parts[0] = gen_int_mode (l[0], SImode);
20545 gcc_unreachable ();
20550 if (mode == TImode)
20551 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20552 if (mode == XFmode || mode == TFmode)
20554 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
20555 if (REG_P (operand))
20557 gcc_assert (reload_completed);
20558 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
20559 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
20561 else if (offsettable_memref_p (operand))
20563 operand = adjust_address (operand, DImode, 0);
20564 parts[0] = operand;
20565 parts[1] = adjust_address (operand, upper_mode, 8);
20567 else if (GET_CODE (operand) == CONST_DOUBLE)
20572 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20573 real_to_target (l, &r, mode);
20575 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20576 if (HOST_BITS_PER_WIDE_INT >= 64)
20579 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
20580 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
20583 parts[0] = immed_double_const (l[0], l[1], DImode);
20585 if (upper_mode == SImode)
20586 parts[1] = gen_int_mode (l[2], SImode);
20587 else if (HOST_BITS_PER_WIDE_INT >= 64)
20590 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
20591 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
20594 parts[1] = immed_double_const (l[2], l[3], DImode);
20597 gcc_unreachable ();
20604 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20605 Return false when normal moves are needed; true when all required
20606 insns have been emitted. Operands 2-4 contain the input values
20607 int the correct order; operands 5-7 contain the output values. */
20610 ix86_split_long_move (rtx operands[])
20615 int collisions = 0;
20616 enum machine_mode mode = GET_MODE (operands[0]);
20617 bool collisionparts[4];
20619 /* The DFmode expanders may ask us to move double.
20620 For 64bit target this is single move. By hiding the fact
20621 here we simplify i386.md splitters. */
20622 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
20624 /* Optimize constant pool reference to immediates. This is used by
20625 fp moves, that force all constants to memory to allow combining. */
20627 if (MEM_P (operands[1])
20628 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
20629 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
20630 operands[1] = get_pool_constant (XEXP (operands[1], 0));
20631 if (push_operand (operands[0], VOIDmode))
20633 operands[0] = copy_rtx (operands[0]);
20634 PUT_MODE (operands[0], Pmode);
20637 operands[0] = gen_lowpart (DImode, operands[0]);
20638 operands[1] = gen_lowpart (DImode, operands[1]);
20639 emit_move_insn (operands[0], operands[1]);
20643 /* The only non-offsettable memory we handle is push. */
20644 if (push_operand (operands[0], VOIDmode))
20647 gcc_assert (!MEM_P (operands[0])
20648 || offsettable_memref_p (operands[0]));
20650 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
20651 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
20653 /* When emitting push, take care for source operands on the stack. */
20654 if (push && MEM_P (operands[1])
20655 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
20657 rtx src_base = XEXP (part[1][nparts - 1], 0);
20659 /* Compensate for the stack decrement by 4. */
20660 if (!TARGET_64BIT && nparts == 3
20661 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
20662 src_base = plus_constant (src_base, 4);
20664 /* src_base refers to the stack pointer and is
20665 automatically decreased by emitted push. */
20666 for (i = 0; i < nparts; i++)
20667 part[1][i] = change_address (part[1][i],
20668 GET_MODE (part[1][i]), src_base);
20671 /* We need to do copy in the right order in case an address register
20672 of the source overlaps the destination. */
20673 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
20677 for (i = 0; i < nparts; i++)
20680 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
20681 if (collisionparts[i])
20685 /* Collision in the middle part can be handled by reordering. */
20686 if (collisions == 1 && nparts == 3 && collisionparts [1])
20688 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20689 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20691 else if (collisions == 1
20693 && (collisionparts [1] || collisionparts [2]))
20695 if (collisionparts [1])
20697 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20698 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20702 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
20703 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
20707 /* If there are more collisions, we can't handle it by reordering.
20708 Do an lea to the last part and use only one colliding move. */
20709 else if (collisions > 1)
20715 base = part[0][nparts - 1];
20717 /* Handle the case when the last part isn't valid for lea.
20718 Happens in 64-bit mode storing the 12-byte XFmode. */
20719 if (GET_MODE (base) != Pmode)
20720 base = gen_rtx_REG (Pmode, REGNO (base));
20722 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
20723 part[1][0] = replace_equiv_address (part[1][0], base);
20724 for (i = 1; i < nparts; i++)
20726 tmp = plus_constant (base, UNITS_PER_WORD * i);
20727 part[1][i] = replace_equiv_address (part[1][i], tmp);
20738 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
20739 emit_insn (gen_addsi3 (stack_pointer_rtx,
20740 stack_pointer_rtx, GEN_INT (-4)));
20741 emit_move_insn (part[0][2], part[1][2]);
20743 else if (nparts == 4)
20745 emit_move_insn (part[0][3], part[1][3]);
20746 emit_move_insn (part[0][2], part[1][2]);
20751 /* In 64bit mode we don't have 32bit push available. In case this is
20752 register, it is OK - we will just use larger counterpart. We also
20753 retype memory - these comes from attempt to avoid REX prefix on
20754 moving of second half of TFmode value. */
20755 if (GET_MODE (part[1][1]) == SImode)
20757 switch (GET_CODE (part[1][1]))
20760 part[1][1] = adjust_address (part[1][1], DImode, 0);
20764 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
20768 gcc_unreachable ();
20771 if (GET_MODE (part[1][0]) == SImode)
20772 part[1][0] = part[1][1];
20775 emit_move_insn (part[0][1], part[1][1]);
20776 emit_move_insn (part[0][0], part[1][0]);
20780 /* Choose correct order to not overwrite the source before it is copied. */
20781 if ((REG_P (part[0][0])
20782 && REG_P (part[1][1])
20783 && (REGNO (part[0][0]) == REGNO (part[1][1])
20785 && REGNO (part[0][0]) == REGNO (part[1][2]))
20787 && REGNO (part[0][0]) == REGNO (part[1][3]))))
20789 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
20791 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
20793 operands[2 + i] = part[0][j];
20794 operands[6 + i] = part[1][j];
20799 for (i = 0; i < nparts; i++)
20801 operands[2 + i] = part[0][i];
20802 operands[6 + i] = part[1][i];
20806 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20807 if (optimize_insn_for_size_p ())
20809 for (j = 0; j < nparts - 1; j++)
20810 if (CONST_INT_P (operands[6 + j])
20811 && operands[6 + j] != const0_rtx
20812 && REG_P (operands[2 + j]))
20813 for (i = j; i < nparts - 1; i++)
20814 if (CONST_INT_P (operands[7 + i])
20815 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
20816 operands[7 + i] = operands[2 + j];
20819 for (i = 0; i < nparts; i++)
20820 emit_move_insn (operands[2 + i], operands[6 + i]);
20825 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20826 left shift by a constant, either using a single shift or
20827 a sequence of add instructions. */
20830 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20832 rtx (*insn)(rtx, rtx, rtx);
20835 || (count * ix86_cost->add <= ix86_cost->shift_const
20836 && !optimize_insn_for_size_p ()))
20838 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20839 while (count-- > 0)
20840 emit_insn (insn (operand, operand, operand));
20844 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20845 emit_insn (insn (operand, operand, GEN_INT (count)));
20850 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20852 rtx (*gen_ashl3)(rtx, rtx, rtx);
20853 rtx (*gen_shld)(rtx, rtx, rtx);
20854 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20856 rtx low[2], high[2];
20859 if (CONST_INT_P (operands[2]))
20861 split_double_mode (mode, operands, 2, low, high);
20862 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20864 if (count >= half_width)
20866 emit_move_insn (high[0], low[1]);
20867 emit_move_insn (low[0], const0_rtx);
20869 if (count > half_width)
20870 ix86_expand_ashl_const (high[0], count - half_width, mode);
20874 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20876 if (!rtx_equal_p (operands[0], operands[1]))
20877 emit_move_insn (operands[0], operands[1]);
20879 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20880 ix86_expand_ashl_const (low[0], count, mode);
20885 split_double_mode (mode, operands, 1, low, high);
20887 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20889 if (operands[1] == const1_rtx)
20891 /* Assuming we've chosen a QImode capable registers, then 1 << N
20892 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20893 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20895 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20897 ix86_expand_clear (low[0]);
20898 ix86_expand_clear (high[0]);
20899 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20901 d = gen_lowpart (QImode, low[0]);
20902 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20903 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20904 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20906 d = gen_lowpart (QImode, high[0]);
20907 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20908 s = gen_rtx_NE (QImode, flags, const0_rtx);
20909 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20912 /* Otherwise, we can get the same results by manually performing
20913 a bit extract operation on bit 5/6, and then performing the two
20914 shifts. The two methods of getting 0/1 into low/high are exactly
20915 the same size. Avoiding the shift in the bit extract case helps
20916 pentium4 a bit; no one else seems to care much either way. */
20919 enum machine_mode half_mode;
20920 rtx (*gen_lshr3)(rtx, rtx, rtx);
20921 rtx (*gen_and3)(rtx, rtx, rtx);
20922 rtx (*gen_xor3)(rtx, rtx, rtx);
20923 HOST_WIDE_INT bits;
20926 if (mode == DImode)
20928 half_mode = SImode;
20929 gen_lshr3 = gen_lshrsi3;
20930 gen_and3 = gen_andsi3;
20931 gen_xor3 = gen_xorsi3;
20936 half_mode = DImode;
20937 gen_lshr3 = gen_lshrdi3;
20938 gen_and3 = gen_anddi3;
20939 gen_xor3 = gen_xordi3;
20943 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
20944 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
20946 x = gen_lowpart (half_mode, operands[2]);
20947 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
20949 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
20950 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
20951 emit_move_insn (low[0], high[0]);
20952 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
20955 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20956 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
20960 if (operands[1] == constm1_rtx)
20962 /* For -1 << N, we can avoid the shld instruction, because we
20963 know that we're shifting 0...31/63 ones into a -1. */
20964 emit_move_insn (low[0], constm1_rtx);
20965 if (optimize_insn_for_size_p ())
20966 emit_move_insn (high[0], low[0]);
20968 emit_move_insn (high[0], constm1_rtx);
20972 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20974 if (!rtx_equal_p (operands[0], operands[1]))
20975 emit_move_insn (operands[0], operands[1]);
20977 split_double_mode (mode, operands, 1, low, high);
20978 emit_insn (gen_shld (high[0], low[0], operands[2]));
20981 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20983 if (TARGET_CMOVE && scratch)
20985 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20986 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20988 ix86_expand_clear (scratch);
20989 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
20993 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20994 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20996 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
21001 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
21003 rtx (*gen_ashr3)(rtx, rtx, rtx)
21004 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
21005 rtx (*gen_shrd)(rtx, rtx, rtx);
21006 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21008 rtx low[2], high[2];
21011 if (CONST_INT_P (operands[2]))
21013 split_double_mode (mode, operands, 2, low, high);
21014 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21016 if (count == GET_MODE_BITSIZE (mode) - 1)
21018 emit_move_insn (high[0], high[1]);
21019 emit_insn (gen_ashr3 (high[0], high[0],
21020 GEN_INT (half_width - 1)));
21021 emit_move_insn (low[0], high[0]);
21024 else if (count >= half_width)
21026 emit_move_insn (low[0], high[1]);
21027 emit_move_insn (high[0], low[0]);
21028 emit_insn (gen_ashr3 (high[0], high[0],
21029 GEN_INT (half_width - 1)));
21031 if (count > half_width)
21032 emit_insn (gen_ashr3 (low[0], low[0],
21033 GEN_INT (count - half_width)));
21037 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21039 if (!rtx_equal_p (operands[0], operands[1]))
21040 emit_move_insn (operands[0], operands[1]);
21042 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21043 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
21048 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21050 if (!rtx_equal_p (operands[0], operands[1]))
21051 emit_move_insn (operands[0], operands[1]);
21053 split_double_mode (mode, operands, 1, low, high);
21055 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21056 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
21058 if (TARGET_CMOVE && scratch)
21060 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21061 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21063 emit_move_insn (scratch, high[0]);
21064 emit_insn (gen_ashr3 (scratch, scratch,
21065 GEN_INT (half_width - 1)));
21066 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21071 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
21072 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
21074 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
21080 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
21082 rtx (*gen_lshr3)(rtx, rtx, rtx)
21083 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
21084 rtx (*gen_shrd)(rtx, rtx, rtx);
21085 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21087 rtx low[2], high[2];
21090 if (CONST_INT_P (operands[2]))
21092 split_double_mode (mode, operands, 2, low, high);
21093 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21095 if (count >= half_width)
21097 emit_move_insn (low[0], high[1]);
21098 ix86_expand_clear (high[0]);
21100 if (count > half_width)
21101 emit_insn (gen_lshr3 (low[0], low[0],
21102 GEN_INT (count - half_width)));
21106 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21108 if (!rtx_equal_p (operands[0], operands[1]))
21109 emit_move_insn (operands[0], operands[1]);
21111 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21112 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
21117 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21119 if (!rtx_equal_p (operands[0], operands[1]))
21120 emit_move_insn (operands[0], operands[1]);
21122 split_double_mode (mode, operands, 1, low, high);
21124 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21125 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
21127 if (TARGET_CMOVE && scratch)
21129 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21130 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21132 ix86_expand_clear (scratch);
21133 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21138 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21139 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21141 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
21146 /* Predict just emitted jump instruction to be taken with probability PROB. */
21148 predict_jump (int prob)
21150 rtx insn = get_last_insn ();
21151 gcc_assert (JUMP_P (insn));
21152 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
21155 /* Helper function for the string operations below. Dest VARIABLE whether
21156 it is aligned to VALUE bytes. If true, jump to the label. */
21158 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
21160 rtx label = gen_label_rtx ();
21161 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
21162 if (GET_MODE (variable) == DImode)
21163 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
21165 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
21166 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
21169 predict_jump (REG_BR_PROB_BASE * 50 / 100);
21171 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21175 /* Adjust COUNTER by the VALUE. */
21177 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
21179 rtx (*gen_add)(rtx, rtx, rtx)
21180 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
21182 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
21185 /* Zero extend possibly SImode EXP to Pmode register. */
21187 ix86_zero_extend_to_Pmode (rtx exp)
21190 if (GET_MODE (exp) == VOIDmode)
21191 return force_reg (Pmode, exp);
21192 if (GET_MODE (exp) == Pmode)
21193 return copy_to_mode_reg (Pmode, exp);
21194 r = gen_reg_rtx (Pmode);
21195 emit_insn (gen_zero_extendsidi2 (r, exp));
21199 /* Divide COUNTREG by SCALE. */
21201 scale_counter (rtx countreg, int scale)
21207 if (CONST_INT_P (countreg))
21208 return GEN_INT (INTVAL (countreg) / scale);
21209 gcc_assert (REG_P (countreg));
21211 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
21212 GEN_INT (exact_log2 (scale)),
21213 NULL, 1, OPTAB_DIRECT);
21217 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21218 DImode for constant loop counts. */
21220 static enum machine_mode
21221 counter_mode (rtx count_exp)
21223 if (GET_MODE (count_exp) != VOIDmode)
21224 return GET_MODE (count_exp);
21225 if (!CONST_INT_P (count_exp))
21227 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
21232 /* When SRCPTR is non-NULL, output simple loop to move memory
21233 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21234 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21235 equivalent loop to set memory by VALUE (supposed to be in MODE).
21237 The size is rounded down to whole number of chunk size moved at once.
21238 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21242 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
21243 rtx destptr, rtx srcptr, rtx value,
21244 rtx count, enum machine_mode mode, int unroll,
21247 rtx out_label, top_label, iter, tmp;
21248 enum machine_mode iter_mode = counter_mode (count);
21249 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
21250 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21256 top_label = gen_label_rtx ();
21257 out_label = gen_label_rtx ();
21258 iter = gen_reg_rtx (iter_mode);
21260 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21261 NULL, 1, OPTAB_DIRECT);
21262 /* Those two should combine. */
21263 if (piece_size == const1_rtx)
21265 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21267 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21269 emit_move_insn (iter, const0_rtx);
21271 emit_label (top_label);
21273 tmp = convert_modes (Pmode, iter_mode, iter, true);
21274 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21275 destmem = change_address (destmem, mode, x_addr);
21279 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21280 srcmem = change_address (srcmem, mode, y_addr);
21282 /* When unrolling for chips that reorder memory reads and writes,
21283 we can save registers by using single temporary.
21284 Also using 4 temporaries is overkill in 32bit mode. */
21285 if (!TARGET_64BIT && 0)
21287 for (i = 0; i < unroll; i++)
21292 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21294 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21296 emit_move_insn (destmem, srcmem);
21302 gcc_assert (unroll <= 4);
21303 for (i = 0; i < unroll; i++)
21305 tmpreg[i] = gen_reg_rtx (mode);
21309 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21311 emit_move_insn (tmpreg[i], srcmem);
21313 for (i = 0; i < unroll; i++)
21318 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21320 emit_move_insn (destmem, tmpreg[i]);
21325 for (i = 0; i < unroll; i++)
21329 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21330 emit_move_insn (destmem, value);
21333 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
21334 true, OPTAB_LIB_WIDEN);
21336 emit_move_insn (iter, tmp);
21338 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
21340 if (expected_size != -1)
21342 expected_size /= GET_MODE_SIZE (mode) * unroll;
21343 if (expected_size == 0)
21345 else if (expected_size > REG_BR_PROB_BASE)
21346 predict_jump (REG_BR_PROB_BASE - 1);
21348 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
21351 predict_jump (REG_BR_PROB_BASE * 80 / 100);
21352 iter = ix86_zero_extend_to_Pmode (iter);
21353 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
21354 true, OPTAB_LIB_WIDEN);
21355 if (tmp != destptr)
21356 emit_move_insn (destptr, tmp);
21359 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
21360 true, OPTAB_LIB_WIDEN);
21362 emit_move_insn (srcptr, tmp);
21364 emit_label (out_label);
21367 /* Output "rep; mov" instruction.
21368 Arguments have same meaning as for previous function */
21370 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
21371 rtx destptr, rtx srcptr,
21373 enum machine_mode mode)
21378 HOST_WIDE_INT rounded_count;
21380 /* If the size is known, it is shorter to use rep movs. */
21381 if (mode == QImode && CONST_INT_P (count)
21382 && !(INTVAL (count) & 3))
21385 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21386 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21387 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
21388 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
21389 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21390 if (mode != QImode)
21392 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21393 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21394 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21395 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
21396 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21397 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
21401 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21402 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
21404 if (CONST_INT_P (count))
21406 rounded_count = (INTVAL (count)
21407 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21408 destmem = shallow_copy_rtx (destmem);
21409 srcmem = shallow_copy_rtx (srcmem);
21410 set_mem_size (destmem, rounded_count);
21411 set_mem_size (srcmem, rounded_count);
21415 if (MEM_SIZE_KNOWN_P (destmem))
21416 clear_mem_size (destmem);
21417 if (MEM_SIZE_KNOWN_P (srcmem))
21418 clear_mem_size (srcmem);
21420 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
21424 /* Output "rep; stos" instruction.
21425 Arguments have same meaning as for previous function */
21427 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
21428 rtx count, enum machine_mode mode,
21433 HOST_WIDE_INT rounded_count;
21435 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21436 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21437 value = force_reg (mode, gen_lowpart (mode, value));
21438 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21439 if (mode != QImode)
21441 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21442 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21443 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21446 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21447 if (orig_value == const0_rtx && CONST_INT_P (count))
21449 rounded_count = (INTVAL (count)
21450 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21451 destmem = shallow_copy_rtx (destmem);
21452 set_mem_size (destmem, rounded_count);
21454 else if (MEM_SIZE_KNOWN_P (destmem))
21455 clear_mem_size (destmem);
21456 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
21460 emit_strmov (rtx destmem, rtx srcmem,
21461 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
21463 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
21464 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
21465 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21468 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21470 expand_movmem_epilogue (rtx destmem, rtx srcmem,
21471 rtx destptr, rtx srcptr, rtx count, int max_size)
21474 if (CONST_INT_P (count))
21476 HOST_WIDE_INT countval = INTVAL (count);
21479 if ((countval & 0x10) && max_size > 16)
21483 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21484 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
21487 gcc_unreachable ();
21490 if ((countval & 0x08) && max_size > 8)
21493 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21496 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21497 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
21501 if ((countval & 0x04) && max_size > 4)
21503 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21506 if ((countval & 0x02) && max_size > 2)
21508 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
21511 if ((countval & 0x01) && max_size > 1)
21513 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
21520 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
21521 count, 1, OPTAB_DIRECT);
21522 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
21523 count, QImode, 1, 4);
21527 /* When there are stringops, we can cheaply increase dest and src pointers.
21528 Otherwise we save code size by maintaining offset (zero is readily
21529 available from preceding rep operation) and using x86 addressing modes.
21531 if (TARGET_SINGLE_STRINGOP)
21535 rtx label = ix86_expand_aligntest (count, 4, true);
21536 src = change_address (srcmem, SImode, srcptr);
21537 dest = change_address (destmem, SImode, destptr);
21538 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21539 emit_label (label);
21540 LABEL_NUSES (label) = 1;
21544 rtx label = ix86_expand_aligntest (count, 2, true);
21545 src = change_address (srcmem, HImode, srcptr);
21546 dest = change_address (destmem, HImode, destptr);
21547 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21548 emit_label (label);
21549 LABEL_NUSES (label) = 1;
21553 rtx label = ix86_expand_aligntest (count, 1, true);
21554 src = change_address (srcmem, QImode, srcptr);
21555 dest = change_address (destmem, QImode, destptr);
21556 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21557 emit_label (label);
21558 LABEL_NUSES (label) = 1;
21563 rtx offset = force_reg (Pmode, const0_rtx);
21568 rtx label = ix86_expand_aligntest (count, 4, true);
21569 src = change_address (srcmem, SImode, srcptr);
21570 dest = change_address (destmem, SImode, destptr);
21571 emit_move_insn (dest, src);
21572 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
21573 true, OPTAB_LIB_WIDEN);
21575 emit_move_insn (offset, tmp);
21576 emit_label (label);
21577 LABEL_NUSES (label) = 1;
21581 rtx label = ix86_expand_aligntest (count, 2, true);
21582 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21583 src = change_address (srcmem, HImode, tmp);
21584 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21585 dest = change_address (destmem, HImode, tmp);
21586 emit_move_insn (dest, src);
21587 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
21588 true, OPTAB_LIB_WIDEN);
21590 emit_move_insn (offset, tmp);
21591 emit_label (label);
21592 LABEL_NUSES (label) = 1;
21596 rtx label = ix86_expand_aligntest (count, 1, true);
21597 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21598 src = change_address (srcmem, QImode, tmp);
21599 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21600 dest = change_address (destmem, QImode, tmp);
21601 emit_move_insn (dest, src);
21602 emit_label (label);
21603 LABEL_NUSES (label) = 1;
21608 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21610 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
21611 rtx count, int max_size)
21614 expand_simple_binop (counter_mode (count), AND, count,
21615 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
21616 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
21617 gen_lowpart (QImode, value), count, QImode,
21621 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21623 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
21627 if (CONST_INT_P (count))
21629 HOST_WIDE_INT countval = INTVAL (count);
21632 if ((countval & 0x10) && max_size > 16)
21636 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21637 emit_insn (gen_strset (destptr, dest, value));
21638 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
21639 emit_insn (gen_strset (destptr, dest, value));
21642 gcc_unreachable ();
21645 if ((countval & 0x08) && max_size > 8)
21649 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21650 emit_insn (gen_strset (destptr, dest, value));
21654 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21655 emit_insn (gen_strset (destptr, dest, value));
21656 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
21657 emit_insn (gen_strset (destptr, dest, value));
21661 if ((countval & 0x04) && max_size > 4)
21663 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21664 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21667 if ((countval & 0x02) && max_size > 2)
21669 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
21670 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21673 if ((countval & 0x01) && max_size > 1)
21675 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
21676 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21683 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
21688 rtx label = ix86_expand_aligntest (count, 16, true);
21691 dest = change_address (destmem, DImode, destptr);
21692 emit_insn (gen_strset (destptr, dest, value));
21693 emit_insn (gen_strset (destptr, dest, value));
21697 dest = change_address (destmem, SImode, destptr);
21698 emit_insn (gen_strset (destptr, dest, value));
21699 emit_insn (gen_strset (destptr, dest, value));
21700 emit_insn (gen_strset (destptr, dest, value));
21701 emit_insn (gen_strset (destptr, dest, value));
21703 emit_label (label);
21704 LABEL_NUSES (label) = 1;
21708 rtx label = ix86_expand_aligntest (count, 8, true);
21711 dest = change_address (destmem, DImode, destptr);
21712 emit_insn (gen_strset (destptr, dest, value));
21716 dest = change_address (destmem, SImode, destptr);
21717 emit_insn (gen_strset (destptr, dest, value));
21718 emit_insn (gen_strset (destptr, dest, value));
21720 emit_label (label);
21721 LABEL_NUSES (label) = 1;
21725 rtx label = ix86_expand_aligntest (count, 4, true);
21726 dest = change_address (destmem, SImode, destptr);
21727 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21728 emit_label (label);
21729 LABEL_NUSES (label) = 1;
21733 rtx label = ix86_expand_aligntest (count, 2, true);
21734 dest = change_address (destmem, HImode, destptr);
21735 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21736 emit_label (label);
21737 LABEL_NUSES (label) = 1;
21741 rtx label = ix86_expand_aligntest (count, 1, true);
21742 dest = change_address (destmem, QImode, destptr);
21743 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21744 emit_label (label);
21745 LABEL_NUSES (label) = 1;
21749 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21750 DESIRED_ALIGNMENT. */
21752 expand_movmem_prologue (rtx destmem, rtx srcmem,
21753 rtx destptr, rtx srcptr, rtx count,
21754 int align, int desired_alignment)
21756 if (align <= 1 && desired_alignment > 1)
21758 rtx label = ix86_expand_aligntest (destptr, 1, false);
21759 srcmem = change_address (srcmem, QImode, srcptr);
21760 destmem = change_address (destmem, QImode, destptr);
21761 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21762 ix86_adjust_counter (count, 1);
21763 emit_label (label);
21764 LABEL_NUSES (label) = 1;
21766 if (align <= 2 && desired_alignment > 2)
21768 rtx label = ix86_expand_aligntest (destptr, 2, false);
21769 srcmem = change_address (srcmem, HImode, srcptr);
21770 destmem = change_address (destmem, HImode, destptr);
21771 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21772 ix86_adjust_counter (count, 2);
21773 emit_label (label);
21774 LABEL_NUSES (label) = 1;
21776 if (align <= 4 && desired_alignment > 4)
21778 rtx label = ix86_expand_aligntest (destptr, 4, false);
21779 srcmem = change_address (srcmem, SImode, srcptr);
21780 destmem = change_address (destmem, SImode, destptr);
21781 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21782 ix86_adjust_counter (count, 4);
21783 emit_label (label);
21784 LABEL_NUSES (label) = 1;
21786 gcc_assert (desired_alignment <= 8);
21789 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21790 ALIGN_BYTES is how many bytes need to be copied. */
21792 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
21793 int desired_align, int align_bytes)
21796 rtx orig_dst = dst;
21797 rtx orig_src = src;
21799 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
21800 if (src_align_bytes >= 0)
21801 src_align_bytes = desired_align - src_align_bytes;
21802 if (align_bytes & 1)
21804 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21805 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
21807 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21809 if (align_bytes & 2)
21811 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21812 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
21813 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21814 set_mem_align (dst, 2 * BITS_PER_UNIT);
21815 if (src_align_bytes >= 0
21816 && (src_align_bytes & 1) == (align_bytes & 1)
21817 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
21818 set_mem_align (src, 2 * BITS_PER_UNIT);
21820 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21822 if (align_bytes & 4)
21824 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21825 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21826 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21827 set_mem_align (dst, 4 * BITS_PER_UNIT);
21828 if (src_align_bytes >= 0)
21830 unsigned int src_align = 0;
21831 if ((src_align_bytes & 3) == (align_bytes & 3))
21833 else if ((src_align_bytes & 1) == (align_bytes & 1))
21835 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21836 set_mem_align (src, src_align * BITS_PER_UNIT);
21839 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21841 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21842 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21843 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21844 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21845 if (src_align_bytes >= 0)
21847 unsigned int src_align = 0;
21848 if ((src_align_bytes & 7) == (align_bytes & 7))
21850 else if ((src_align_bytes & 3) == (align_bytes & 3))
21852 else if ((src_align_bytes & 1) == (align_bytes & 1))
21854 if (src_align > (unsigned int) desired_align)
21855 src_align = desired_align;
21856 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21857 set_mem_align (src, src_align * BITS_PER_UNIT);
21859 if (MEM_SIZE_KNOWN_P (orig_dst))
21860 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21861 if (MEM_SIZE_KNOWN_P (orig_src))
21862 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21867 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21868 DESIRED_ALIGNMENT. */
21870 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21871 int align, int desired_alignment)
21873 if (align <= 1 && desired_alignment > 1)
21875 rtx label = ix86_expand_aligntest (destptr, 1, false);
21876 destmem = change_address (destmem, QImode, destptr);
21877 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21878 ix86_adjust_counter (count, 1);
21879 emit_label (label);
21880 LABEL_NUSES (label) = 1;
21882 if (align <= 2 && desired_alignment > 2)
21884 rtx label = ix86_expand_aligntest (destptr, 2, false);
21885 destmem = change_address (destmem, HImode, destptr);
21886 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21887 ix86_adjust_counter (count, 2);
21888 emit_label (label);
21889 LABEL_NUSES (label) = 1;
21891 if (align <= 4 && desired_alignment > 4)
21893 rtx label = ix86_expand_aligntest (destptr, 4, false);
21894 destmem = change_address (destmem, SImode, destptr);
21895 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21896 ix86_adjust_counter (count, 4);
21897 emit_label (label);
21898 LABEL_NUSES (label) = 1;
21900 gcc_assert (desired_alignment <= 8);
21903 /* Set enough from DST to align DST known to by aligned by ALIGN to
21904 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21906 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21907 int desired_align, int align_bytes)
21910 rtx orig_dst = dst;
21911 if (align_bytes & 1)
21913 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21915 emit_insn (gen_strset (destreg, dst,
21916 gen_lowpart (QImode, value)));
21918 if (align_bytes & 2)
21920 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21921 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21922 set_mem_align (dst, 2 * BITS_PER_UNIT);
21924 emit_insn (gen_strset (destreg, dst,
21925 gen_lowpart (HImode, value)));
21927 if (align_bytes & 4)
21929 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21930 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21931 set_mem_align (dst, 4 * BITS_PER_UNIT);
21933 emit_insn (gen_strset (destreg, dst,
21934 gen_lowpart (SImode, value)));
21936 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21937 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21938 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21939 if (MEM_SIZE_KNOWN_P (orig_dst))
21940 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21944 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21945 static enum stringop_alg
21946 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
21947 int *dynamic_check)
21949 const struct stringop_algs * algs;
21950 bool optimize_for_speed;
21951 /* Algorithms using the rep prefix want at least edi and ecx;
21952 additionally, memset wants eax and memcpy wants esi. Don't
21953 consider such algorithms if the user has appropriated those
21954 registers for their own purposes. */
21955 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
21957 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
21959 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21960 || (alg != rep_prefix_1_byte \
21961 && alg != rep_prefix_4_byte \
21962 && alg != rep_prefix_8_byte))
21963 const struct processor_costs *cost;
21965 /* Even if the string operation call is cold, we still might spend a lot
21966 of time processing large blocks. */
21967 if (optimize_function_for_size_p (cfun)
21968 || (optimize_insn_for_size_p ()
21969 && expected_size != -1 && expected_size < 256))
21970 optimize_for_speed = false;
21972 optimize_for_speed = true;
21974 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
21976 *dynamic_check = -1;
21978 algs = &cost->memset[TARGET_64BIT != 0];
21980 algs = &cost->memcpy[TARGET_64BIT != 0];
21981 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
21982 return ix86_stringop_alg;
21983 /* rep; movq or rep; movl is the smallest variant. */
21984 else if (!optimize_for_speed)
21986 if (!count || (count & 3))
21987 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
21989 return rep_prefix_usable ? rep_prefix_4_byte : loop;
21991 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21993 else if (expected_size != -1 && expected_size < 4)
21994 return loop_1_byte;
21995 else if (expected_size != -1)
21998 enum stringop_alg alg = libcall;
21999 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22001 /* We get here if the algorithms that were not libcall-based
22002 were rep-prefix based and we are unable to use rep prefixes
22003 based on global register usage. Break out of the loop and
22004 use the heuristic below. */
22005 if (algs->size[i].max == 0)
22007 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
22009 enum stringop_alg candidate = algs->size[i].alg;
22011 if (candidate != libcall && ALG_USABLE_P (candidate))
22013 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22014 last non-libcall inline algorithm. */
22015 if (TARGET_INLINE_ALL_STRINGOPS)
22017 /* When the current size is best to be copied by a libcall,
22018 but we are still forced to inline, run the heuristic below
22019 that will pick code for medium sized blocks. */
22020 if (alg != libcall)
22024 else if (ALG_USABLE_P (candidate))
22028 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
22030 /* When asked to inline the call anyway, try to pick meaningful choice.
22031 We look for maximal size of block that is faster to copy by hand and
22032 take blocks of at most of that size guessing that average size will
22033 be roughly half of the block.
22035 If this turns out to be bad, we might simply specify the preferred
22036 choice in ix86_costs. */
22037 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22038 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
22041 enum stringop_alg alg;
22043 bool any_alg_usable_p = true;
22045 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22047 enum stringop_alg candidate = algs->size[i].alg;
22048 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
22050 if (candidate != libcall && candidate
22051 && ALG_USABLE_P (candidate))
22052 max = algs->size[i].max;
22054 /* If there aren't any usable algorithms, then recursing on
22055 smaller sizes isn't going to find anything. Just return the
22056 simple byte-at-a-time copy loop. */
22057 if (!any_alg_usable_p)
22059 /* Pick something reasonable. */
22060 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22061 *dynamic_check = 128;
22062 return loop_1_byte;
22066 alg = decide_alg (count, max / 2, memset, dynamic_check);
22067 gcc_assert (*dynamic_check == -1);
22068 gcc_assert (alg != libcall);
22069 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22070 *dynamic_check = max;
22073 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
22074 #undef ALG_USABLE_P
22077 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22078 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22080 decide_alignment (int align,
22081 enum stringop_alg alg,
22084 int desired_align = 0;
22088 gcc_unreachable ();
22090 case unrolled_loop:
22091 desired_align = GET_MODE_SIZE (Pmode);
22093 case rep_prefix_8_byte:
22096 case rep_prefix_4_byte:
22097 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22098 copying whole cacheline at once. */
22099 if (TARGET_PENTIUMPRO)
22104 case rep_prefix_1_byte:
22105 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22106 copying whole cacheline at once. */
22107 if (TARGET_PENTIUMPRO)
22121 if (desired_align < align)
22122 desired_align = align;
22123 if (expected_size != -1 && expected_size < 4)
22124 desired_align = align;
22125 return desired_align;
22128 /* Return the smallest power of 2 greater than VAL. */
22130 smallest_pow2_greater_than (int val)
22138 /* Expand string move (memcpy) operation. Use i386 string operations
22139 when profitable. expand_setmem contains similar code. The code
22140 depends upon architecture, block size and alignment, but always has
22141 the same overall structure:
22143 1) Prologue guard: Conditional that jumps up to epilogues for small
22144 blocks that can be handled by epilogue alone. This is faster
22145 but also needed for correctness, since prologue assume the block
22146 is larger than the desired alignment.
22148 Optional dynamic check for size and libcall for large
22149 blocks is emitted here too, with -minline-stringops-dynamically.
22151 2) Prologue: copy first few bytes in order to get destination
22152 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22153 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22154 copied. We emit either a jump tree on power of two sized
22155 blocks, or a byte loop.
22157 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22158 with specified algorithm.
22160 4) Epilogue: code copying tail of the block that is too small to be
22161 handled by main body (or up to size guarded by prologue guard). */
22164 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
22165 rtx expected_align_exp, rtx expected_size_exp)
22171 rtx jump_around_label = NULL;
22172 HOST_WIDE_INT align = 1;
22173 unsigned HOST_WIDE_INT count = 0;
22174 HOST_WIDE_INT expected_size = -1;
22175 int size_needed = 0, epilogue_size_needed;
22176 int desired_align = 0, align_bytes = 0;
22177 enum stringop_alg alg;
22179 bool need_zero_guard = false;
22181 if (CONST_INT_P (align_exp))
22182 align = INTVAL (align_exp);
22183 /* i386 can do misaligned access on reasonably increased cost. */
22184 if (CONST_INT_P (expected_align_exp)
22185 && INTVAL (expected_align_exp) > align)
22186 align = INTVAL (expected_align_exp);
22187 /* ALIGN is the minimum of destination and source alignment, but we care here
22188 just about destination alignment. */
22189 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
22190 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
22192 if (CONST_INT_P (count_exp))
22193 count = expected_size = INTVAL (count_exp);
22194 if (CONST_INT_P (expected_size_exp) && count == 0)
22195 expected_size = INTVAL (expected_size_exp);
22197 /* Make sure we don't need to care about overflow later on. */
22198 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22201 /* Step 0: Decide on preferred algorithm, desired alignment and
22202 size of chunks to be copied by main loop. */
22204 alg = decide_alg (count, expected_size, false, &dynamic_check);
22205 desired_align = decide_alignment (align, alg, expected_size);
22207 if (!TARGET_ALIGN_STRINGOPS)
22208 align = desired_align;
22210 if (alg == libcall)
22212 gcc_assert (alg != no_stringop);
22214 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
22215 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22216 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
22221 gcc_unreachable ();
22223 need_zero_guard = true;
22224 size_needed = GET_MODE_SIZE (Pmode);
22226 case unrolled_loop:
22227 need_zero_guard = true;
22228 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
22230 case rep_prefix_8_byte:
22233 case rep_prefix_4_byte:
22236 case rep_prefix_1_byte:
22240 need_zero_guard = true;
22245 epilogue_size_needed = size_needed;
22247 /* Step 1: Prologue guard. */
22249 /* Alignment code needs count to be in register. */
22250 if (CONST_INT_P (count_exp) && desired_align > align)
22252 if (INTVAL (count_exp) > desired_align
22253 && INTVAL (count_exp) > size_needed)
22256 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22257 if (align_bytes <= 0)
22260 align_bytes = desired_align - align_bytes;
22262 if (align_bytes == 0)
22263 count_exp = force_reg (counter_mode (count_exp), count_exp);
22265 gcc_assert (desired_align >= 1 && align >= 1);
22267 /* Ensure that alignment prologue won't copy past end of block. */
22268 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22270 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22271 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22272 Make sure it is power of 2. */
22273 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22277 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22279 /* If main algorithm works on QImode, no epilogue is needed.
22280 For small sizes just don't align anything. */
22281 if (size_needed == 1)
22282 desired_align = align;
22289 label = gen_label_rtx ();
22290 emit_cmp_and_jump_insns (count_exp,
22291 GEN_INT (epilogue_size_needed),
22292 LTU, 0, counter_mode (count_exp), 1, label);
22293 if (expected_size == -1 || expected_size < epilogue_size_needed)
22294 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22296 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22300 /* Emit code to decide on runtime whether library call or inline should be
22302 if (dynamic_check != -1)
22304 if (CONST_INT_P (count_exp))
22306 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22308 emit_block_move_via_libcall (dst, src, count_exp, false);
22309 count_exp = const0_rtx;
22315 rtx hot_label = gen_label_rtx ();
22316 jump_around_label = gen_label_rtx ();
22317 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22318 LEU, 0, GET_MODE (count_exp), 1, hot_label);
22319 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22320 emit_block_move_via_libcall (dst, src, count_exp, false);
22321 emit_jump (jump_around_label);
22322 emit_label (hot_label);
22326 /* Step 2: Alignment prologue. */
22328 if (desired_align > align)
22330 if (align_bytes == 0)
22332 /* Except for the first move in epilogue, we no longer know
22333 constant offset in aliasing info. It don't seems to worth
22334 the pain to maintain it for the first move, so throw away
22336 src = change_address (src, BLKmode, srcreg);
22337 dst = change_address (dst, BLKmode, destreg);
22338 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
22343 /* If we know how many bytes need to be stored before dst is
22344 sufficiently aligned, maintain aliasing info accurately. */
22345 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
22346 desired_align, align_bytes);
22347 count_exp = plus_constant (count_exp, -align_bytes);
22348 count -= align_bytes;
22350 if (need_zero_guard
22351 && (count < (unsigned HOST_WIDE_INT) size_needed
22352 || (align_bytes == 0
22353 && count < ((unsigned HOST_WIDE_INT) size_needed
22354 + desired_align - align))))
22356 /* It is possible that we copied enough so the main loop will not
22358 gcc_assert (size_needed > 1);
22359 if (label == NULL_RTX)
22360 label = gen_label_rtx ();
22361 emit_cmp_and_jump_insns (count_exp,
22362 GEN_INT (size_needed),
22363 LTU, 0, counter_mode (count_exp), 1, label);
22364 if (expected_size == -1
22365 || expected_size < (desired_align - align) / 2 + size_needed)
22366 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22368 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22371 if (label && size_needed == 1)
22373 emit_label (label);
22374 LABEL_NUSES (label) = 1;
22376 epilogue_size_needed = 1;
22378 else if (label == NULL_RTX)
22379 epilogue_size_needed = size_needed;
22381 /* Step 3: Main loop. */
22387 gcc_unreachable ();
22389 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22390 count_exp, QImode, 1, expected_size);
22393 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22394 count_exp, Pmode, 1, expected_size);
22396 case unrolled_loop:
22397 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22398 registers for 4 temporaries anyway. */
22399 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22400 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
22403 case rep_prefix_8_byte:
22404 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22407 case rep_prefix_4_byte:
22408 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22411 case rep_prefix_1_byte:
22412 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22416 /* Adjust properly the offset of src and dest memory for aliasing. */
22417 if (CONST_INT_P (count_exp))
22419 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
22420 (count / size_needed) * size_needed);
22421 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22422 (count / size_needed) * size_needed);
22426 src = change_address (src, BLKmode, srcreg);
22427 dst = change_address (dst, BLKmode, destreg);
22430 /* Step 4: Epilogue to copy the remaining bytes. */
22434 /* When the main loop is done, COUNT_EXP might hold original count,
22435 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22436 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22437 bytes. Compensate if needed. */
22439 if (size_needed < epilogue_size_needed)
22442 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22443 GEN_INT (size_needed - 1), count_exp, 1,
22445 if (tmp != count_exp)
22446 emit_move_insn (count_exp, tmp);
22448 emit_label (label);
22449 LABEL_NUSES (label) = 1;
22452 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22453 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
22454 epilogue_size_needed);
22455 if (jump_around_label)
22456 emit_label (jump_around_label);
22460 /* Helper function for memcpy. For QImode value 0xXY produce
22461 0xXYXYXYXY of wide specified by MODE. This is essentially
22462 a * 0x10101010, but we can do slightly better than
22463 synth_mult by unwinding the sequence by hand on CPUs with
22466 promote_duplicated_reg (enum machine_mode mode, rtx val)
22468 enum machine_mode valmode = GET_MODE (val);
22470 int nops = mode == DImode ? 3 : 2;
22472 gcc_assert (mode == SImode || mode == DImode);
22473 if (val == const0_rtx)
22474 return copy_to_mode_reg (mode, const0_rtx);
22475 if (CONST_INT_P (val))
22477 HOST_WIDE_INT v = INTVAL (val) & 255;
22481 if (mode == DImode)
22482 v |= (v << 16) << 16;
22483 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
22486 if (valmode == VOIDmode)
22488 if (valmode != QImode)
22489 val = gen_lowpart (QImode, val);
22490 if (mode == QImode)
22492 if (!TARGET_PARTIAL_REG_STALL)
22494 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
22495 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
22496 <= (ix86_cost->shift_const + ix86_cost->add) * nops
22497 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
22499 rtx reg = convert_modes (mode, QImode, val, true);
22500 tmp = promote_duplicated_reg (mode, const1_rtx);
22501 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
22506 rtx reg = convert_modes (mode, QImode, val, true);
22508 if (!TARGET_PARTIAL_REG_STALL)
22509 if (mode == SImode)
22510 emit_insn (gen_movsi_insv_1 (reg, reg));
22512 emit_insn (gen_movdi_insv_1 (reg, reg));
22515 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
22516 NULL, 1, OPTAB_DIRECT);
22518 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22520 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
22521 NULL, 1, OPTAB_DIRECT);
22522 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22523 if (mode == SImode)
22525 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
22526 NULL, 1, OPTAB_DIRECT);
22527 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22532 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22533 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22534 alignment from ALIGN to DESIRED_ALIGN. */
22536 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
22541 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
22542 promoted_val = promote_duplicated_reg (DImode, val);
22543 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
22544 promoted_val = promote_duplicated_reg (SImode, val);
22545 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
22546 promoted_val = promote_duplicated_reg (HImode, val);
22548 promoted_val = val;
22550 return promoted_val;
22553 /* Expand string clear operation (bzero). Use i386 string operations when
22554 profitable. See expand_movmem comment for explanation of individual
22555 steps performed. */
22557 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
22558 rtx expected_align_exp, rtx expected_size_exp)
22563 rtx jump_around_label = NULL;
22564 HOST_WIDE_INT align = 1;
22565 unsigned HOST_WIDE_INT count = 0;
22566 HOST_WIDE_INT expected_size = -1;
22567 int size_needed = 0, epilogue_size_needed;
22568 int desired_align = 0, align_bytes = 0;
22569 enum stringop_alg alg;
22570 rtx promoted_val = NULL;
22571 bool force_loopy_epilogue = false;
22573 bool need_zero_guard = false;
22575 if (CONST_INT_P (align_exp))
22576 align = INTVAL (align_exp);
22577 /* i386 can do misaligned access on reasonably increased cost. */
22578 if (CONST_INT_P (expected_align_exp)
22579 && INTVAL (expected_align_exp) > align)
22580 align = INTVAL (expected_align_exp);
22581 if (CONST_INT_P (count_exp))
22582 count = expected_size = INTVAL (count_exp);
22583 if (CONST_INT_P (expected_size_exp) && count == 0)
22584 expected_size = INTVAL (expected_size_exp);
22586 /* Make sure we don't need to care about overflow later on. */
22587 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22590 /* Step 0: Decide on preferred algorithm, desired alignment and
22591 size of chunks to be copied by main loop. */
22593 alg = decide_alg (count, expected_size, true, &dynamic_check);
22594 desired_align = decide_alignment (align, alg, expected_size);
22596 if (!TARGET_ALIGN_STRINGOPS)
22597 align = desired_align;
22599 if (alg == libcall)
22601 gcc_assert (alg != no_stringop);
22603 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
22604 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22609 gcc_unreachable ();
22611 need_zero_guard = true;
22612 size_needed = GET_MODE_SIZE (Pmode);
22614 case unrolled_loop:
22615 need_zero_guard = true;
22616 size_needed = GET_MODE_SIZE (Pmode) * 4;
22618 case rep_prefix_8_byte:
22621 case rep_prefix_4_byte:
22624 case rep_prefix_1_byte:
22628 need_zero_guard = true;
22632 epilogue_size_needed = size_needed;
22634 /* Step 1: Prologue guard. */
22636 /* Alignment code needs count to be in register. */
22637 if (CONST_INT_P (count_exp) && desired_align > align)
22639 if (INTVAL (count_exp) > desired_align
22640 && INTVAL (count_exp) > size_needed)
22643 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22644 if (align_bytes <= 0)
22647 align_bytes = desired_align - align_bytes;
22649 if (align_bytes == 0)
22651 enum machine_mode mode = SImode;
22652 if (TARGET_64BIT && (count & ~0xffffffff))
22654 count_exp = force_reg (mode, count_exp);
22657 /* Do the cheap promotion to allow better CSE across the
22658 main loop and epilogue (ie one load of the big constant in the
22659 front of all code. */
22660 if (CONST_INT_P (val_exp))
22661 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22662 desired_align, align);
22663 /* Ensure that alignment prologue won't copy past end of block. */
22664 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22666 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22667 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22668 Make sure it is power of 2. */
22669 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22671 /* To improve performance of small blocks, we jump around the VAL
22672 promoting mode. This mean that if the promoted VAL is not constant,
22673 we might not use it in the epilogue and have to use byte
22675 if (epilogue_size_needed > 2 && !promoted_val)
22676 force_loopy_epilogue = true;
22679 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22681 /* If main algorithm works on QImode, no epilogue is needed.
22682 For small sizes just don't align anything. */
22683 if (size_needed == 1)
22684 desired_align = align;
22691 label = gen_label_rtx ();
22692 emit_cmp_and_jump_insns (count_exp,
22693 GEN_INT (epilogue_size_needed),
22694 LTU, 0, counter_mode (count_exp), 1, label);
22695 if (expected_size == -1 || expected_size <= epilogue_size_needed)
22696 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22698 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22701 if (dynamic_check != -1)
22703 rtx hot_label = gen_label_rtx ();
22704 jump_around_label = gen_label_rtx ();
22705 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22706 LEU, 0, counter_mode (count_exp), 1, hot_label);
22707 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22708 set_storage_via_libcall (dst, count_exp, val_exp, false);
22709 emit_jump (jump_around_label);
22710 emit_label (hot_label);
22713 /* Step 2: Alignment prologue. */
22715 /* Do the expensive promotion once we branched off the small blocks. */
22717 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22718 desired_align, align);
22719 gcc_assert (desired_align >= 1 && align >= 1);
22721 if (desired_align > align)
22723 if (align_bytes == 0)
22725 /* Except for the first move in epilogue, we no longer know
22726 constant offset in aliasing info. It don't seems to worth
22727 the pain to maintain it for the first move, so throw away
22729 dst = change_address (dst, BLKmode, destreg);
22730 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
22735 /* If we know how many bytes need to be stored before dst is
22736 sufficiently aligned, maintain aliasing info accurately. */
22737 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
22738 desired_align, align_bytes);
22739 count_exp = plus_constant (count_exp, -align_bytes);
22740 count -= align_bytes;
22742 if (need_zero_guard
22743 && (count < (unsigned HOST_WIDE_INT) size_needed
22744 || (align_bytes == 0
22745 && count < ((unsigned HOST_WIDE_INT) size_needed
22746 + desired_align - align))))
22748 /* It is possible that we copied enough so the main loop will not
22750 gcc_assert (size_needed > 1);
22751 if (label == NULL_RTX)
22752 label = gen_label_rtx ();
22753 emit_cmp_and_jump_insns (count_exp,
22754 GEN_INT (size_needed),
22755 LTU, 0, counter_mode (count_exp), 1, label);
22756 if (expected_size == -1
22757 || expected_size < (desired_align - align) / 2 + size_needed)
22758 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22760 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22763 if (label && size_needed == 1)
22765 emit_label (label);
22766 LABEL_NUSES (label) = 1;
22768 promoted_val = val_exp;
22769 epilogue_size_needed = 1;
22771 else if (label == NULL_RTX)
22772 epilogue_size_needed = size_needed;
22774 /* Step 3: Main loop. */
22780 gcc_unreachable ();
22782 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22783 count_exp, QImode, 1, expected_size);
22786 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22787 count_exp, Pmode, 1, expected_size);
22789 case unrolled_loop:
22790 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22791 count_exp, Pmode, 4, expected_size);
22793 case rep_prefix_8_byte:
22794 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22797 case rep_prefix_4_byte:
22798 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22801 case rep_prefix_1_byte:
22802 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22806 /* Adjust properly the offset of src and dest memory for aliasing. */
22807 if (CONST_INT_P (count_exp))
22808 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22809 (count / size_needed) * size_needed);
22811 dst = change_address (dst, BLKmode, destreg);
22813 /* Step 4: Epilogue to copy the remaining bytes. */
22817 /* When the main loop is done, COUNT_EXP might hold original count,
22818 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22819 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22820 bytes. Compensate if needed. */
22822 if (size_needed < epilogue_size_needed)
22825 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22826 GEN_INT (size_needed - 1), count_exp, 1,
22828 if (tmp != count_exp)
22829 emit_move_insn (count_exp, tmp);
22831 emit_label (label);
22832 LABEL_NUSES (label) = 1;
22835 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22837 if (force_loopy_epilogue)
22838 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
22839 epilogue_size_needed);
22841 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
22842 epilogue_size_needed);
22844 if (jump_around_label)
22845 emit_label (jump_around_label);
22849 /* Expand the appropriate insns for doing strlen if not just doing
22852 out = result, initialized with the start address
22853 align_rtx = alignment of the address.
22854 scratch = scratch register, initialized with the startaddress when
22855 not aligned, otherwise undefined
22857 This is just the body. It needs the initializations mentioned above and
22858 some address computing at the end. These things are done in i386.md. */
22861 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
22865 rtx align_2_label = NULL_RTX;
22866 rtx align_3_label = NULL_RTX;
22867 rtx align_4_label = gen_label_rtx ();
22868 rtx end_0_label = gen_label_rtx ();
22870 rtx tmpreg = gen_reg_rtx (SImode);
22871 rtx scratch = gen_reg_rtx (SImode);
22875 if (CONST_INT_P (align_rtx))
22876 align = INTVAL (align_rtx);
22878 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22880 /* Is there a known alignment and is it less than 4? */
22883 rtx scratch1 = gen_reg_rtx (Pmode);
22884 emit_move_insn (scratch1, out);
22885 /* Is there a known alignment and is it not 2? */
22888 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
22889 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
22891 /* Leave just the 3 lower bits. */
22892 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
22893 NULL_RTX, 0, OPTAB_WIDEN);
22895 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22896 Pmode, 1, align_4_label);
22897 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
22898 Pmode, 1, align_2_label);
22899 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
22900 Pmode, 1, align_3_label);
22904 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22905 check if is aligned to 4 - byte. */
22907 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
22908 NULL_RTX, 0, OPTAB_WIDEN);
22910 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22911 Pmode, 1, align_4_label);
22914 mem = change_address (src, QImode, out);
22916 /* Now compare the bytes. */
22918 /* Compare the first n unaligned byte on a byte per byte basis. */
22919 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
22920 QImode, 1, end_0_label);
22922 /* Increment the address. */
22923 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22925 /* Not needed with an alignment of 2 */
22928 emit_label (align_2_label);
22930 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22933 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22935 emit_label (align_3_label);
22938 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22941 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22944 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22945 align this loop. It gives only huge programs, but does not help to
22947 emit_label (align_4_label);
22949 mem = change_address (src, SImode, out);
22950 emit_move_insn (scratch, mem);
22951 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
22953 /* This formula yields a nonzero result iff one of the bytes is zero.
22954 This saves three branches inside loop and many cycles. */
22956 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
22957 emit_insn (gen_one_cmplsi2 (scratch, scratch));
22958 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
22959 emit_insn (gen_andsi3 (tmpreg, tmpreg,
22960 gen_int_mode (0x80808080, SImode)));
22961 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
22966 rtx reg = gen_reg_rtx (SImode);
22967 rtx reg2 = gen_reg_rtx (Pmode);
22968 emit_move_insn (reg, tmpreg);
22969 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
22971 /* If zero is not in the first two bytes, move two bytes forward. */
22972 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22973 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22974 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22975 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
22976 gen_rtx_IF_THEN_ELSE (SImode, tmp,
22979 /* Emit lea manually to avoid clobbering of flags. */
22980 emit_insn (gen_rtx_SET (SImode, reg2,
22981 gen_rtx_PLUS (Pmode, out, const2_rtx)));
22983 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22984 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22985 emit_insn (gen_rtx_SET (VOIDmode, out,
22986 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
22992 rtx end_2_label = gen_label_rtx ();
22993 /* Is zero in the first two bytes? */
22995 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22996 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22997 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
22998 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22999 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
23001 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23002 JUMP_LABEL (tmp) = end_2_label;
23004 /* Not in the first two. Move two bytes forward. */
23005 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
23006 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
23008 emit_label (end_2_label);
23012 /* Avoid branch in fixing the byte. */
23013 tmpreg = gen_lowpart (QImode, tmpreg);
23014 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
23015 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
23016 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
23017 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
23019 emit_label (end_0_label);
23022 /* Expand strlen. */
23025 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
23027 rtx addr, scratch1, scratch2, scratch3, scratch4;
23029 /* The generic case of strlen expander is long. Avoid it's
23030 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23032 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23033 && !TARGET_INLINE_ALL_STRINGOPS
23034 && !optimize_insn_for_size_p ()
23035 && (!CONST_INT_P (align) || INTVAL (align) < 4))
23038 addr = force_reg (Pmode, XEXP (src, 0));
23039 scratch1 = gen_reg_rtx (Pmode);
23041 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23042 && !optimize_insn_for_size_p ())
23044 /* Well it seems that some optimizer does not combine a call like
23045 foo(strlen(bar), strlen(bar));
23046 when the move and the subtraction is done here. It does calculate
23047 the length just once when these instructions are done inside of
23048 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23049 often used and I use one fewer register for the lifetime of
23050 output_strlen_unroll() this is better. */
23052 emit_move_insn (out, addr);
23054 ix86_expand_strlensi_unroll_1 (out, src, align);
23056 /* strlensi_unroll_1 returns the address of the zero at the end of
23057 the string, like memchr(), so compute the length by subtracting
23058 the start address. */
23059 emit_insn (ix86_gen_sub3 (out, out, addr));
23065 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23066 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
23069 scratch2 = gen_reg_rtx (Pmode);
23070 scratch3 = gen_reg_rtx (Pmode);
23071 scratch4 = force_reg (Pmode, constm1_rtx);
23073 emit_move_insn (scratch3, addr);
23074 eoschar = force_reg (QImode, eoschar);
23076 src = replace_equiv_address_nv (src, scratch3);
23078 /* If .md starts supporting :P, this can be done in .md. */
23079 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
23080 scratch4), UNSPEC_SCAS);
23081 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
23082 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
23083 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
23088 /* For given symbol (function) construct code to compute address of it's PLT
23089 entry in large x86-64 PIC model. */
23091 construct_plt_address (rtx symbol)
23093 rtx tmp = gen_reg_rtx (Pmode);
23094 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
23096 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
23097 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
23099 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
23100 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
23105 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
23107 rtx pop, bool sibcall)
23109 /* We need to represent that SI and DI registers are clobbered
23111 static int clobbered_registers[] = {
23112 XMM6_REG, XMM7_REG, XMM8_REG,
23113 XMM9_REG, XMM10_REG, XMM11_REG,
23114 XMM12_REG, XMM13_REG, XMM14_REG,
23115 XMM15_REG, SI_REG, DI_REG
23117 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
23118 rtx use = NULL, call;
23119 unsigned int vec_len;
23121 if (pop == const0_rtx)
23123 gcc_assert (!TARGET_64BIT || !pop);
23125 if (TARGET_MACHO && !TARGET_64BIT)
23128 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
23129 fnaddr = machopic_indirect_call_target (fnaddr);
23134 /* Static functions and indirect calls don't need the pic register. */
23135 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
23136 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23137 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
23138 use_reg (&use, pic_offset_table_rtx);
23141 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
23143 rtx al = gen_rtx_REG (QImode, AX_REG);
23144 emit_move_insn (al, callarg2);
23145 use_reg (&use, al);
23148 if (ix86_cmodel == CM_LARGE_PIC
23150 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23151 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
23152 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
23154 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
23155 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
23157 fnaddr = XEXP (fnaddr, 0);
23158 if (GET_MODE (fnaddr) != Pmode)
23159 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
23160 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
23164 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
23166 call = gen_rtx_SET (VOIDmode, retval, call);
23167 vec[vec_len++] = call;
23171 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
23172 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
23173 vec[vec_len++] = pop;
23176 if (TARGET_64BIT_MS_ABI
23177 && (!callarg2 || INTVAL (callarg2) != -2))
23181 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
23182 UNSPEC_MS_TO_SYSV_CALL);
23184 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
23186 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
23188 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
23190 clobbered_registers[i]));
23193 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23194 if (TARGET_VZEROUPPER)
23197 if (cfun->machine->callee_pass_avx256_p)
23199 if (cfun->machine->callee_return_avx256_p)
23200 avx256 = callee_return_pass_avx256;
23202 avx256 = callee_pass_avx256;
23204 else if (cfun->machine->callee_return_avx256_p)
23205 avx256 = callee_return_avx256;
23207 avx256 = call_no_avx256;
23209 if (reload_completed)
23210 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
23212 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
23213 gen_rtvec (1, GEN_INT (avx256)),
23214 UNSPEC_CALL_NEEDS_VZEROUPPER);
23218 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
23219 call = emit_call_insn (call);
23221 CALL_INSN_FUNCTION_USAGE (call) = use;
23227 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
23229 rtx pat = PATTERN (insn);
23230 rtvec vec = XVEC (pat, 0);
23231 int len = GET_NUM_ELEM (vec) - 1;
23233 /* Strip off the last entry of the parallel. */
23234 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
23235 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
23237 pat = RTVEC_ELT (vec, 0);
23239 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
23241 emit_insn (gen_avx_vzeroupper (vzeroupper));
23242 emit_call_insn (pat);
23245 /* Output the assembly for a call instruction. */
23248 ix86_output_call_insn (rtx insn, rtx call_op)
23250 bool direct_p = constant_call_address_operand (call_op, Pmode);
23251 bool seh_nop_p = false;
23254 if (SIBLING_CALL_P (insn))
23258 /* SEH epilogue detection requires the indirect branch case
23259 to include REX.W. */
23260 else if (TARGET_SEH)
23261 xasm = "rex.W jmp %A0";
23265 output_asm_insn (xasm, &call_op);
23269 /* SEH unwinding can require an extra nop to be emitted in several
23270 circumstances. Determine if we have one of those. */
23275 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23277 /* If we get to another real insn, we don't need the nop. */
23281 /* If we get to the epilogue note, prevent a catch region from
23282 being adjacent to the standard epilogue sequence. If non-
23283 call-exceptions, we'll have done this during epilogue emission. */
23284 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23285 && !flag_non_call_exceptions
23286 && !can_throw_internal (insn))
23293 /* If we didn't find a real insn following the call, prevent the
23294 unwinder from looking into the next function. */
23300 xasm = "call\t%P0";
23302 xasm = "call\t%A0";
23304 output_asm_insn (xasm, &call_op);
23312 /* Clear stack slot assignments remembered from previous functions.
23313 This is called from INIT_EXPANDERS once before RTL is emitted for each
23316 static struct machine_function *
23317 ix86_init_machine_status (void)
23319 struct machine_function *f;
23321 f = ggc_alloc_cleared_machine_function ();
23322 f->use_fast_prologue_epilogue_nregs = -1;
23323 f->tls_descriptor_call_expanded_p = 0;
23324 f->call_abi = ix86_abi;
23329 /* Return a MEM corresponding to a stack slot with mode MODE.
23330 Allocate a new slot if necessary.
23332 The RTL for a function can have several slots available: N is
23333 which slot to use. */
23336 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23338 struct stack_local_entry *s;
23340 gcc_assert (n < MAX_386_STACK_LOCALS);
23342 /* Virtual slot is valid only before vregs are instantiated. */
23343 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
23345 for (s = ix86_stack_locals; s; s = s->next)
23346 if (s->mode == mode && s->n == n)
23347 return validize_mem (copy_rtx (s->rtl));
23349 s = ggc_alloc_stack_local_entry ();
23352 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23354 s->next = ix86_stack_locals;
23355 ix86_stack_locals = s;
23356 return validize_mem (s->rtl);
23359 /* Calculate the length of the memory address in the instruction encoding.
23360 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23361 or other prefixes. */
23364 memory_address_length (rtx addr)
23366 struct ix86_address parts;
23367 rtx base, index, disp;
23371 if (GET_CODE (addr) == PRE_DEC
23372 || GET_CODE (addr) == POST_INC
23373 || GET_CODE (addr) == PRE_MODIFY
23374 || GET_CODE (addr) == POST_MODIFY)
23377 ok = ix86_decompose_address (addr, &parts);
23380 if (parts.base && GET_CODE (parts.base) == SUBREG)
23381 parts.base = SUBREG_REG (parts.base);
23382 if (parts.index && GET_CODE (parts.index) == SUBREG)
23383 parts.index = SUBREG_REG (parts.index);
23386 index = parts.index;
23389 /* Add length of addr32 prefix. */
23390 len = (GET_CODE (addr) == ZERO_EXTEND
23391 || GET_CODE (addr) == AND);
23394 - esp as the base always wants an index,
23395 - ebp as the base always wants a displacement,
23396 - r12 as the base always wants an index,
23397 - r13 as the base always wants a displacement. */
23399 /* Register Indirect. */
23400 if (base && !index && !disp)
23402 /* esp (for its index) and ebp (for its displacement) need
23403 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23406 && (addr == arg_pointer_rtx
23407 || addr == frame_pointer_rtx
23408 || REGNO (addr) == SP_REG
23409 || REGNO (addr) == BP_REG
23410 || REGNO (addr) == R12_REG
23411 || REGNO (addr) == R13_REG))
23415 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23416 is not disp32, but disp32(%rip), so for disp32
23417 SIB byte is needed, unless print_operand_address
23418 optimizes it into disp32(%rip) or (%rip) is implied
23420 else if (disp && !base && !index)
23427 if (GET_CODE (disp) == CONST)
23428 symbol = XEXP (disp, 0);
23429 if (GET_CODE (symbol) == PLUS
23430 && CONST_INT_P (XEXP (symbol, 1)))
23431 symbol = XEXP (symbol, 0);
23433 if (GET_CODE (symbol) != LABEL_REF
23434 && (GET_CODE (symbol) != SYMBOL_REF
23435 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
23436 && (GET_CODE (symbol) != UNSPEC
23437 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
23438 && XINT (symbol, 1) != UNSPEC_PCREL
23439 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
23446 /* Find the length of the displacement constant. */
23449 if (base && satisfies_constraint_K (disp))
23454 /* ebp always wants a displacement. Similarly r13. */
23455 else if (base && REG_P (base)
23456 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
23459 /* An index requires the two-byte modrm form.... */
23461 /* ...like esp (or r12), which always wants an index. */
23462 || base == arg_pointer_rtx
23463 || base == frame_pointer_rtx
23464 || (base && REG_P (base)
23465 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
23482 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23483 is set, expect that insn have 8bit immediate alternative. */
23485 ix86_attr_length_immediate_default (rtx insn, bool shortform)
23489 extract_insn_cached (insn);
23490 for (i = recog_data.n_operands - 1; i >= 0; --i)
23491 if (CONSTANT_P (recog_data.operand[i]))
23493 enum attr_mode mode = get_attr_mode (insn);
23496 if (shortform && CONST_INT_P (recog_data.operand[i]))
23498 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
23505 ival = trunc_int_for_mode (ival, HImode);
23508 ival = trunc_int_for_mode (ival, SImode);
23513 if (IN_RANGE (ival, -128, 127))
23530 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23535 fatal_insn ("unknown insn mode", insn);
23540 /* Compute default value for "length_address" attribute. */
23542 ix86_attr_length_address_default (rtx insn)
23546 if (get_attr_type (insn) == TYPE_LEA)
23548 rtx set = PATTERN (insn), addr;
23550 if (GET_CODE (set) == PARALLEL)
23551 set = XVECEXP (set, 0, 0);
23553 gcc_assert (GET_CODE (set) == SET);
23555 addr = SET_SRC (set);
23556 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
23558 if (GET_CODE (addr) == ZERO_EXTEND)
23559 addr = XEXP (addr, 0);
23560 if (GET_CODE (addr) == SUBREG)
23561 addr = SUBREG_REG (addr);
23564 return memory_address_length (addr);
23567 extract_insn_cached (insn);
23568 for (i = recog_data.n_operands - 1; i >= 0; --i)
23569 if (MEM_P (recog_data.operand[i]))
23571 constrain_operands_cached (reload_completed);
23572 if (which_alternative != -1)
23574 const char *constraints = recog_data.constraints[i];
23575 int alt = which_alternative;
23577 while (*constraints == '=' || *constraints == '+')
23580 while (*constraints++ != ',')
23582 /* Skip ignored operands. */
23583 if (*constraints == 'X')
23586 return memory_address_length (XEXP (recog_data.operand[i], 0));
23591 /* Compute default value for "length_vex" attribute. It includes
23592 2 or 3 byte VEX prefix and 1 opcode byte. */
23595 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
23599 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23600 byte VEX prefix. */
23601 if (!has_0f_opcode || has_vex_w)
23604 /* We can always use 2 byte VEX prefix in 32bit. */
23608 extract_insn_cached (insn);
23610 for (i = recog_data.n_operands - 1; i >= 0; --i)
23611 if (REG_P (recog_data.operand[i]))
23613 /* REX.W bit uses 3 byte VEX prefix. */
23614 if (GET_MODE (recog_data.operand[i]) == DImode
23615 && GENERAL_REG_P (recog_data.operand[i]))
23620 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23621 if (MEM_P (recog_data.operand[i])
23622 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
23629 /* Return the maximum number of instructions a cpu can issue. */
23632 ix86_issue_rate (void)
23636 case PROCESSOR_PENTIUM:
23637 case PROCESSOR_ATOM:
23641 case PROCESSOR_PENTIUMPRO:
23642 case PROCESSOR_PENTIUM4:
23643 case PROCESSOR_CORE2_32:
23644 case PROCESSOR_CORE2_64:
23645 case PROCESSOR_COREI7_32:
23646 case PROCESSOR_COREI7_64:
23647 case PROCESSOR_ATHLON:
23649 case PROCESSOR_AMDFAM10:
23650 case PROCESSOR_NOCONA:
23651 case PROCESSOR_GENERIC32:
23652 case PROCESSOR_GENERIC64:
23653 case PROCESSOR_BDVER1:
23654 case PROCESSOR_BDVER2:
23655 case PROCESSOR_BTVER1:
23663 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23664 by DEP_INSN and nothing set by DEP_INSN. */
23667 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
23671 /* Simplify the test for uninteresting insns. */
23672 if (insn_type != TYPE_SETCC
23673 && insn_type != TYPE_ICMOV
23674 && insn_type != TYPE_FCMOV
23675 && insn_type != TYPE_IBR)
23678 if ((set = single_set (dep_insn)) != 0)
23680 set = SET_DEST (set);
23683 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
23684 && XVECLEN (PATTERN (dep_insn), 0) == 2
23685 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
23686 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
23688 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23689 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23694 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
23697 /* This test is true if the dependent insn reads the flags but
23698 not any other potentially set register. */
23699 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
23702 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
23708 /* Return true iff USE_INSN has a memory address with operands set by
23712 ix86_agi_dependent (rtx set_insn, rtx use_insn)
23715 extract_insn_cached (use_insn);
23716 for (i = recog_data.n_operands - 1; i >= 0; --i)
23717 if (MEM_P (recog_data.operand[i]))
23719 rtx addr = XEXP (recog_data.operand[i], 0);
23720 return modified_in_p (addr, set_insn) != 0;
23726 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
23728 enum attr_type insn_type, dep_insn_type;
23729 enum attr_memory memory;
23731 int dep_insn_code_number;
23733 /* Anti and output dependencies have zero cost on all CPUs. */
23734 if (REG_NOTE_KIND (link) != 0)
23737 dep_insn_code_number = recog_memoized (dep_insn);
23739 /* If we can't recognize the insns, we can't really do anything. */
23740 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
23743 insn_type = get_attr_type (insn);
23744 dep_insn_type = get_attr_type (dep_insn);
23748 case PROCESSOR_PENTIUM:
23749 /* Address Generation Interlock adds a cycle of latency. */
23750 if (insn_type == TYPE_LEA)
23752 rtx addr = PATTERN (insn);
23754 if (GET_CODE (addr) == PARALLEL)
23755 addr = XVECEXP (addr, 0, 0);
23757 gcc_assert (GET_CODE (addr) == SET);
23759 addr = SET_SRC (addr);
23760 if (modified_in_p (addr, dep_insn))
23763 else if (ix86_agi_dependent (dep_insn, insn))
23766 /* ??? Compares pair with jump/setcc. */
23767 if (ix86_flags_dependent (insn, dep_insn, insn_type))
23770 /* Floating point stores require value to be ready one cycle earlier. */
23771 if (insn_type == TYPE_FMOV
23772 && get_attr_memory (insn) == MEMORY_STORE
23773 && !ix86_agi_dependent (dep_insn, insn))
23777 case PROCESSOR_PENTIUMPRO:
23778 memory = get_attr_memory (insn);
23780 /* INT->FP conversion is expensive. */
23781 if (get_attr_fp_int_src (dep_insn))
23784 /* There is one cycle extra latency between an FP op and a store. */
23785 if (insn_type == TYPE_FMOV
23786 && (set = single_set (dep_insn)) != NULL_RTX
23787 && (set2 = single_set (insn)) != NULL_RTX
23788 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
23789 && MEM_P (SET_DEST (set2)))
23792 /* Show ability of reorder buffer to hide latency of load by executing
23793 in parallel with previous instruction in case
23794 previous instruction is not needed to compute the address. */
23795 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23796 && !ix86_agi_dependent (dep_insn, insn))
23798 /* Claim moves to take one cycle, as core can issue one load
23799 at time and the next load can start cycle later. */
23800 if (dep_insn_type == TYPE_IMOV
23801 || dep_insn_type == TYPE_FMOV)
23809 memory = get_attr_memory (insn);
23811 /* The esp dependency is resolved before the instruction is really
23813 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
23814 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
23817 /* INT->FP conversion is expensive. */
23818 if (get_attr_fp_int_src (dep_insn))
23821 /* Show ability of reorder buffer to hide latency of load by executing
23822 in parallel with previous instruction in case
23823 previous instruction is not needed to compute the address. */
23824 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23825 && !ix86_agi_dependent (dep_insn, insn))
23827 /* Claim moves to take one cycle, as core can issue one load
23828 at time and the next load can start cycle later. */
23829 if (dep_insn_type == TYPE_IMOV
23830 || dep_insn_type == TYPE_FMOV)
23839 case PROCESSOR_ATHLON:
23841 case PROCESSOR_AMDFAM10:
23842 case PROCESSOR_BDVER1:
23843 case PROCESSOR_BDVER2:
23844 case PROCESSOR_BTVER1:
23845 case PROCESSOR_ATOM:
23846 case PROCESSOR_GENERIC32:
23847 case PROCESSOR_GENERIC64:
23848 memory = get_attr_memory (insn);
23850 /* Show ability of reorder buffer to hide latency of load by executing
23851 in parallel with previous instruction in case
23852 previous instruction is not needed to compute the address. */
23853 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23854 && !ix86_agi_dependent (dep_insn, insn))
23856 enum attr_unit unit = get_attr_unit (insn);
23859 /* Because of the difference between the length of integer and
23860 floating unit pipeline preparation stages, the memory operands
23861 for floating point are cheaper.
23863 ??? For Athlon it the difference is most probably 2. */
23864 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
23867 loadcost = TARGET_ATHLON ? 2 : 0;
23869 if (cost >= loadcost)
23882 /* How many alternative schedules to try. This should be as wide as the
23883 scheduling freedom in the DFA, but no wider. Making this value too
23884 large results extra work for the scheduler. */
23887 ia32_multipass_dfa_lookahead (void)
23891 case PROCESSOR_PENTIUM:
23894 case PROCESSOR_PENTIUMPRO:
23898 case PROCESSOR_CORE2_32:
23899 case PROCESSOR_CORE2_64:
23900 case PROCESSOR_COREI7_32:
23901 case PROCESSOR_COREI7_64:
23902 case PROCESSOR_ATOM:
23903 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23904 as many instructions can be executed on a cycle, i.e.,
23905 issue_rate. I wonder why tuning for many CPUs does not do this. */
23906 return ix86_issue_rate ();
23915 /* Model decoder of Core 2/i7.
23916 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23917 track the instruction fetch block boundaries and make sure that long
23918 (9+ bytes) instructions are assigned to D0. */
23920 /* Maximum length of an insn that can be handled by
23921 a secondary decoder unit. '8' for Core 2/i7. */
23922 static int core2i7_secondary_decoder_max_insn_size;
23924 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23925 '16' for Core 2/i7. */
23926 static int core2i7_ifetch_block_size;
23928 /* Maximum number of instructions decoder can handle per cycle.
23929 '6' for Core 2/i7. */
23930 static int core2i7_ifetch_block_max_insns;
23932 typedef struct ix86_first_cycle_multipass_data_ *
23933 ix86_first_cycle_multipass_data_t;
23934 typedef const struct ix86_first_cycle_multipass_data_ *
23935 const_ix86_first_cycle_multipass_data_t;
23937 /* A variable to store target state across calls to max_issue within
23939 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
23940 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
23942 /* Initialize DATA. */
23944 core2i7_first_cycle_multipass_init (void *_data)
23946 ix86_first_cycle_multipass_data_t data
23947 = (ix86_first_cycle_multipass_data_t) _data;
23949 data->ifetch_block_len = 0;
23950 data->ifetch_block_n_insns = 0;
23951 data->ready_try_change = NULL;
23952 data->ready_try_change_size = 0;
23955 /* Advancing the cycle; reset ifetch block counts. */
23957 core2i7_dfa_post_advance_cycle (void)
23959 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
23961 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23963 data->ifetch_block_len = 0;
23964 data->ifetch_block_n_insns = 0;
23967 static int min_insn_size (rtx);
23969 /* Filter out insns from ready_try that the core will not be able to issue
23970 on current cycle due to decoder. */
23972 core2i7_first_cycle_multipass_filter_ready_try
23973 (const_ix86_first_cycle_multipass_data_t data,
23974 char *ready_try, int n_ready, bool first_cycle_insn_p)
23981 if (ready_try[n_ready])
23984 insn = get_ready_element (n_ready);
23985 insn_size = min_insn_size (insn);
23987 if (/* If this is a too long an insn for a secondary decoder ... */
23988 (!first_cycle_insn_p
23989 && insn_size > core2i7_secondary_decoder_max_insn_size)
23990 /* ... or it would not fit into the ifetch block ... */
23991 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
23992 /* ... or the decoder is full already ... */
23993 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
23994 /* ... mask the insn out. */
23996 ready_try[n_ready] = 1;
23998 if (data->ready_try_change)
23999 SET_BIT (data->ready_try_change, n_ready);
24004 /* Prepare for a new round of multipass lookahead scheduling. */
24006 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
24007 bool first_cycle_insn_p)
24009 ix86_first_cycle_multipass_data_t data
24010 = (ix86_first_cycle_multipass_data_t) _data;
24011 const_ix86_first_cycle_multipass_data_t prev_data
24012 = ix86_first_cycle_multipass_data;
24014 /* Restore the state from the end of the previous round. */
24015 data->ifetch_block_len = prev_data->ifetch_block_len;
24016 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
24018 /* Filter instructions that cannot be issued on current cycle due to
24019 decoder restrictions. */
24020 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24021 first_cycle_insn_p);
24024 /* INSN is being issued in current solution. Account for its impact on
24025 the decoder model. */
24027 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
24028 rtx insn, const void *_prev_data)
24030 ix86_first_cycle_multipass_data_t data
24031 = (ix86_first_cycle_multipass_data_t) _data;
24032 const_ix86_first_cycle_multipass_data_t prev_data
24033 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
24035 int insn_size = min_insn_size (insn);
24037 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
24038 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
24039 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
24040 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
24042 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24043 if (!data->ready_try_change)
24045 data->ready_try_change = sbitmap_alloc (n_ready);
24046 data->ready_try_change_size = n_ready;
24048 else if (data->ready_try_change_size < n_ready)
24050 data->ready_try_change = sbitmap_resize (data->ready_try_change,
24052 data->ready_try_change_size = n_ready;
24054 sbitmap_zero (data->ready_try_change);
24056 /* Filter out insns from ready_try that the core will not be able to issue
24057 on current cycle due to decoder. */
24058 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24062 /* Revert the effect on ready_try. */
24064 core2i7_first_cycle_multipass_backtrack (const void *_data,
24066 int n_ready ATTRIBUTE_UNUSED)
24068 const_ix86_first_cycle_multipass_data_t data
24069 = (const_ix86_first_cycle_multipass_data_t) _data;
24070 unsigned int i = 0;
24071 sbitmap_iterator sbi;
24073 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
24074 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
24080 /* Save the result of multipass lookahead scheduling for the next round. */
24082 core2i7_first_cycle_multipass_end (const void *_data)
24084 const_ix86_first_cycle_multipass_data_t data
24085 = (const_ix86_first_cycle_multipass_data_t) _data;
24086 ix86_first_cycle_multipass_data_t next_data
24087 = ix86_first_cycle_multipass_data;
24091 next_data->ifetch_block_len = data->ifetch_block_len;
24092 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
24096 /* Deallocate target data. */
24098 core2i7_first_cycle_multipass_fini (void *_data)
24100 ix86_first_cycle_multipass_data_t data
24101 = (ix86_first_cycle_multipass_data_t) _data;
24103 if (data->ready_try_change)
24105 sbitmap_free (data->ready_try_change);
24106 data->ready_try_change = NULL;
24107 data->ready_try_change_size = 0;
24111 /* Prepare for scheduling pass. */
24113 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
24114 int verbose ATTRIBUTE_UNUSED,
24115 int max_uid ATTRIBUTE_UNUSED)
24117 /* Install scheduling hooks for current CPU. Some of these hooks are used
24118 in time-critical parts of the scheduler, so we only set them up when
24119 they are actually used. */
24122 case PROCESSOR_CORE2_32:
24123 case PROCESSOR_CORE2_64:
24124 case PROCESSOR_COREI7_32:
24125 case PROCESSOR_COREI7_64:
24126 targetm.sched.dfa_post_advance_cycle
24127 = core2i7_dfa_post_advance_cycle;
24128 targetm.sched.first_cycle_multipass_init
24129 = core2i7_first_cycle_multipass_init;
24130 targetm.sched.first_cycle_multipass_begin
24131 = core2i7_first_cycle_multipass_begin;
24132 targetm.sched.first_cycle_multipass_issue
24133 = core2i7_first_cycle_multipass_issue;
24134 targetm.sched.first_cycle_multipass_backtrack
24135 = core2i7_first_cycle_multipass_backtrack;
24136 targetm.sched.first_cycle_multipass_end
24137 = core2i7_first_cycle_multipass_end;
24138 targetm.sched.first_cycle_multipass_fini
24139 = core2i7_first_cycle_multipass_fini;
24141 /* Set decoder parameters. */
24142 core2i7_secondary_decoder_max_insn_size = 8;
24143 core2i7_ifetch_block_size = 16;
24144 core2i7_ifetch_block_max_insns = 6;
24148 targetm.sched.dfa_post_advance_cycle = NULL;
24149 targetm.sched.first_cycle_multipass_init = NULL;
24150 targetm.sched.first_cycle_multipass_begin = NULL;
24151 targetm.sched.first_cycle_multipass_issue = NULL;
24152 targetm.sched.first_cycle_multipass_backtrack = NULL;
24153 targetm.sched.first_cycle_multipass_end = NULL;
24154 targetm.sched.first_cycle_multipass_fini = NULL;
24160 /* Compute the alignment given to a constant that is being placed in memory.
24161 EXP is the constant and ALIGN is the alignment that the object would
24163 The value of this function is used instead of that alignment to align
24167 ix86_constant_alignment (tree exp, int align)
24169 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
24170 || TREE_CODE (exp) == INTEGER_CST)
24172 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
24174 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
24177 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
24178 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
24179 return BITS_PER_WORD;
24184 /* Compute the alignment for a static variable.
24185 TYPE is the data type, and ALIGN is the alignment that
24186 the object would ordinarily have. The value of this function is used
24187 instead of that alignment to align the object. */
24190 ix86_data_alignment (tree type, int align)
24192 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
24194 if (AGGREGATE_TYPE_P (type)
24195 && TYPE_SIZE (type)
24196 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24197 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
24198 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
24199 && align < max_align)
24202 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24203 to 16byte boundary. */
24206 if (AGGREGATE_TYPE_P (type)
24207 && TYPE_SIZE (type)
24208 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24209 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
24210 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24214 if (TREE_CODE (type) == ARRAY_TYPE)
24216 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24218 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24221 else if (TREE_CODE (type) == COMPLEX_TYPE)
24224 if (TYPE_MODE (type) == DCmode && align < 64)
24226 if ((TYPE_MODE (type) == XCmode
24227 || TYPE_MODE (type) == TCmode) && align < 128)
24230 else if ((TREE_CODE (type) == RECORD_TYPE
24231 || TREE_CODE (type) == UNION_TYPE
24232 || TREE_CODE (type) == QUAL_UNION_TYPE)
24233 && TYPE_FIELDS (type))
24235 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24237 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24240 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24241 || TREE_CODE (type) == INTEGER_TYPE)
24243 if (TYPE_MODE (type) == DFmode && align < 64)
24245 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24252 /* Compute the alignment for a local variable or a stack slot. EXP is
24253 the data type or decl itself, MODE is the widest mode available and
24254 ALIGN is the alignment that the object would ordinarily have. The
24255 value of this macro is used instead of that alignment to align the
24259 ix86_local_alignment (tree exp, enum machine_mode mode,
24260 unsigned int align)
24264 if (exp && DECL_P (exp))
24266 type = TREE_TYPE (exp);
24275 /* Don't do dynamic stack realignment for long long objects with
24276 -mpreferred-stack-boundary=2. */
24279 && ix86_preferred_stack_boundary < 64
24280 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
24281 && (!type || !TYPE_USER_ALIGN (type))
24282 && (!decl || !DECL_USER_ALIGN (decl)))
24285 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24286 register in MODE. We will return the largest alignment of XF
24290 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
24291 align = GET_MODE_ALIGNMENT (DFmode);
24295 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24296 to 16byte boundary. Exact wording is:
24298 An array uses the same alignment as its elements, except that a local or
24299 global array variable of length at least 16 bytes or
24300 a C99 variable-length array variable always has alignment of at least 16 bytes.
24302 This was added to allow use of aligned SSE instructions at arrays. This
24303 rule is meant for static storage (where compiler can not do the analysis
24304 by itself). We follow it for automatic variables only when convenient.
24305 We fully control everything in the function compiled and functions from
24306 other unit can not rely on the alignment.
24308 Exclude va_list type. It is the common case of local array where
24309 we can not benefit from the alignment. */
24310 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
24313 if (AGGREGATE_TYPE_P (type)
24314 && (va_list_type_node == NULL_TREE
24315 || (TYPE_MAIN_VARIANT (type)
24316 != TYPE_MAIN_VARIANT (va_list_type_node)))
24317 && TYPE_SIZE (type)
24318 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24319 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
24320 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24323 if (TREE_CODE (type) == ARRAY_TYPE)
24325 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24327 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24330 else if (TREE_CODE (type) == COMPLEX_TYPE)
24332 if (TYPE_MODE (type) == DCmode && align < 64)
24334 if ((TYPE_MODE (type) == XCmode
24335 || TYPE_MODE (type) == TCmode) && align < 128)
24338 else if ((TREE_CODE (type) == RECORD_TYPE
24339 || TREE_CODE (type) == UNION_TYPE
24340 || TREE_CODE (type) == QUAL_UNION_TYPE)
24341 && TYPE_FIELDS (type))
24343 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24345 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24348 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24349 || TREE_CODE (type) == INTEGER_TYPE)
24352 if (TYPE_MODE (type) == DFmode && align < 64)
24354 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24360 /* Compute the minimum required alignment for dynamic stack realignment
24361 purposes for a local variable, parameter or a stack slot. EXP is
24362 the data type or decl itself, MODE is its mode and ALIGN is the
24363 alignment that the object would ordinarily have. */
24366 ix86_minimum_alignment (tree exp, enum machine_mode mode,
24367 unsigned int align)
24371 if (exp && DECL_P (exp))
24373 type = TREE_TYPE (exp);
24382 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
24385 /* Don't do dynamic stack realignment for long long objects with
24386 -mpreferred-stack-boundary=2. */
24387 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
24388 && (!type || !TYPE_USER_ALIGN (type))
24389 && (!decl || !DECL_USER_ALIGN (decl)))
24395 /* Find a location for the static chain incoming to a nested function.
24396 This is a register, unless all free registers are used by arguments. */
24399 ix86_static_chain (const_tree fndecl, bool incoming_p)
24403 if (!DECL_STATIC_CHAIN (fndecl))
24408 /* We always use R10 in 64-bit mode. */
24416 /* By default in 32-bit mode we use ECX to pass the static chain. */
24419 fntype = TREE_TYPE (fndecl);
24420 ccvt = ix86_get_callcvt (fntype);
24421 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
24423 /* Fastcall functions use ecx/edx for arguments, which leaves
24424 us with EAX for the static chain.
24425 Thiscall functions use ecx for arguments, which also
24426 leaves us with EAX for the static chain. */
24429 else if (ix86_function_regparm (fntype, fndecl) == 3)
24431 /* For regparm 3, we have no free call-clobbered registers in
24432 which to store the static chain. In order to implement this,
24433 we have the trampoline push the static chain to the stack.
24434 However, we can't push a value below the return address when
24435 we call the nested function directly, so we have to use an
24436 alternate entry point. For this we use ESI, and have the
24437 alternate entry point push ESI, so that things appear the
24438 same once we're executing the nested function. */
24441 if (fndecl == current_function_decl)
24442 ix86_static_chain_on_stack = true;
24443 return gen_frame_mem (SImode,
24444 plus_constant (arg_pointer_rtx, -8));
24450 return gen_rtx_REG (Pmode, regno);
24453 /* Emit RTL insns to initialize the variable parts of a trampoline.
24454 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24455 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24456 to be passed to the target function. */
24459 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
24465 fnaddr = XEXP (DECL_RTL (fndecl), 0);
24471 /* Load the function address to r11. Try to load address using
24472 the shorter movl instead of movabs. We may want to support
24473 movq for kernel mode, but kernel does not use trampolines at
24475 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
24477 fnaddr = copy_to_mode_reg (DImode, fnaddr);
24479 mem = adjust_address (m_tramp, HImode, offset);
24480 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
24482 mem = adjust_address (m_tramp, SImode, offset + 2);
24483 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
24488 mem = adjust_address (m_tramp, HImode, offset);
24489 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
24491 mem = adjust_address (m_tramp, DImode, offset + 2);
24492 emit_move_insn (mem, fnaddr);
24496 /* Load static chain using movabs to r10. Use the
24497 shorter movl instead of movabs for x32. */
24509 mem = adjust_address (m_tramp, HImode, offset);
24510 emit_move_insn (mem, gen_int_mode (opcode, HImode));
24512 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
24513 emit_move_insn (mem, chain_value);
24516 /* Jump to r11; the last (unused) byte is a nop, only there to
24517 pad the write out to a single 32-bit store. */
24518 mem = adjust_address (m_tramp, SImode, offset);
24519 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
24526 /* Depending on the static chain location, either load a register
24527 with a constant, or push the constant to the stack. All of the
24528 instructions are the same size. */
24529 chain = ix86_static_chain (fndecl, true);
24532 switch (REGNO (chain))
24535 opcode = 0xb8; break;
24537 opcode = 0xb9; break;
24539 gcc_unreachable ();
24545 mem = adjust_address (m_tramp, QImode, offset);
24546 emit_move_insn (mem, gen_int_mode (opcode, QImode));
24548 mem = adjust_address (m_tramp, SImode, offset + 1);
24549 emit_move_insn (mem, chain_value);
24552 mem = adjust_address (m_tramp, QImode, offset);
24553 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
24555 mem = adjust_address (m_tramp, SImode, offset + 1);
24557 /* Compute offset from the end of the jmp to the target function.
24558 In the case in which the trampoline stores the static chain on
24559 the stack, we need to skip the first insn which pushes the
24560 (call-saved) register static chain; this push is 1 byte. */
24562 disp = expand_binop (SImode, sub_optab, fnaddr,
24563 plus_constant (XEXP (m_tramp, 0),
24564 offset - (MEM_P (chain) ? 1 : 0)),
24565 NULL_RTX, 1, OPTAB_DIRECT);
24566 emit_move_insn (mem, disp);
24569 gcc_assert (offset <= TRAMPOLINE_SIZE);
24571 #ifdef HAVE_ENABLE_EXECUTE_STACK
24572 #ifdef CHECK_EXECUTE_STACK_ENABLED
24573 if (CHECK_EXECUTE_STACK_ENABLED)
24575 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
24576 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
24580 /* The following file contains several enumerations and data structures
24581 built from the definitions in i386-builtin-types.def. */
24583 #include "i386-builtin-types.inc"
24585 /* Table for the ix86 builtin non-function types. */
24586 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
24588 /* Retrieve an element from the above table, building some of
24589 the types lazily. */
24592 ix86_get_builtin_type (enum ix86_builtin_type tcode)
24594 unsigned int index;
24597 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
24599 type = ix86_builtin_type_tab[(int) tcode];
24603 gcc_assert (tcode > IX86_BT_LAST_PRIM);
24604 if (tcode <= IX86_BT_LAST_VECT)
24606 enum machine_mode mode;
24608 index = tcode - IX86_BT_LAST_PRIM - 1;
24609 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
24610 mode = ix86_builtin_type_vect_mode[index];
24612 type = build_vector_type_for_mode (itype, mode);
24618 index = tcode - IX86_BT_LAST_VECT - 1;
24619 if (tcode <= IX86_BT_LAST_PTR)
24620 quals = TYPE_UNQUALIFIED;
24622 quals = TYPE_QUAL_CONST;
24624 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
24625 if (quals != TYPE_UNQUALIFIED)
24626 itype = build_qualified_type (itype, quals);
24628 type = build_pointer_type (itype);
24631 ix86_builtin_type_tab[(int) tcode] = type;
24635 /* Table for the ix86 builtin function types. */
24636 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
24638 /* Retrieve an element from the above table, building some of
24639 the types lazily. */
24642 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
24646 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
24648 type = ix86_builtin_func_type_tab[(int) tcode];
24652 if (tcode <= IX86_BT_LAST_FUNC)
24654 unsigned start = ix86_builtin_func_start[(int) tcode];
24655 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
24656 tree rtype, atype, args = void_list_node;
24659 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
24660 for (i = after - 1; i > start; --i)
24662 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
24663 args = tree_cons (NULL, atype, args);
24666 type = build_function_type (rtype, args);
24670 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
24671 enum ix86_builtin_func_type icode;
24673 icode = ix86_builtin_func_alias_base[index];
24674 type = ix86_get_builtin_func_type (icode);
24677 ix86_builtin_func_type_tab[(int) tcode] = type;
24682 /* Codes for all the SSE/MMX builtins. */
24685 IX86_BUILTIN_ADDPS,
24686 IX86_BUILTIN_ADDSS,
24687 IX86_BUILTIN_DIVPS,
24688 IX86_BUILTIN_DIVSS,
24689 IX86_BUILTIN_MULPS,
24690 IX86_BUILTIN_MULSS,
24691 IX86_BUILTIN_SUBPS,
24692 IX86_BUILTIN_SUBSS,
24694 IX86_BUILTIN_CMPEQPS,
24695 IX86_BUILTIN_CMPLTPS,
24696 IX86_BUILTIN_CMPLEPS,
24697 IX86_BUILTIN_CMPGTPS,
24698 IX86_BUILTIN_CMPGEPS,
24699 IX86_BUILTIN_CMPNEQPS,
24700 IX86_BUILTIN_CMPNLTPS,
24701 IX86_BUILTIN_CMPNLEPS,
24702 IX86_BUILTIN_CMPNGTPS,
24703 IX86_BUILTIN_CMPNGEPS,
24704 IX86_BUILTIN_CMPORDPS,
24705 IX86_BUILTIN_CMPUNORDPS,
24706 IX86_BUILTIN_CMPEQSS,
24707 IX86_BUILTIN_CMPLTSS,
24708 IX86_BUILTIN_CMPLESS,
24709 IX86_BUILTIN_CMPNEQSS,
24710 IX86_BUILTIN_CMPNLTSS,
24711 IX86_BUILTIN_CMPNLESS,
24712 IX86_BUILTIN_CMPNGTSS,
24713 IX86_BUILTIN_CMPNGESS,
24714 IX86_BUILTIN_CMPORDSS,
24715 IX86_BUILTIN_CMPUNORDSS,
24717 IX86_BUILTIN_COMIEQSS,
24718 IX86_BUILTIN_COMILTSS,
24719 IX86_BUILTIN_COMILESS,
24720 IX86_BUILTIN_COMIGTSS,
24721 IX86_BUILTIN_COMIGESS,
24722 IX86_BUILTIN_COMINEQSS,
24723 IX86_BUILTIN_UCOMIEQSS,
24724 IX86_BUILTIN_UCOMILTSS,
24725 IX86_BUILTIN_UCOMILESS,
24726 IX86_BUILTIN_UCOMIGTSS,
24727 IX86_BUILTIN_UCOMIGESS,
24728 IX86_BUILTIN_UCOMINEQSS,
24730 IX86_BUILTIN_CVTPI2PS,
24731 IX86_BUILTIN_CVTPS2PI,
24732 IX86_BUILTIN_CVTSI2SS,
24733 IX86_BUILTIN_CVTSI642SS,
24734 IX86_BUILTIN_CVTSS2SI,
24735 IX86_BUILTIN_CVTSS2SI64,
24736 IX86_BUILTIN_CVTTPS2PI,
24737 IX86_BUILTIN_CVTTSS2SI,
24738 IX86_BUILTIN_CVTTSS2SI64,
24740 IX86_BUILTIN_MAXPS,
24741 IX86_BUILTIN_MAXSS,
24742 IX86_BUILTIN_MINPS,
24743 IX86_BUILTIN_MINSS,
24745 IX86_BUILTIN_LOADUPS,
24746 IX86_BUILTIN_STOREUPS,
24747 IX86_BUILTIN_MOVSS,
24749 IX86_BUILTIN_MOVHLPS,
24750 IX86_BUILTIN_MOVLHPS,
24751 IX86_BUILTIN_LOADHPS,
24752 IX86_BUILTIN_LOADLPS,
24753 IX86_BUILTIN_STOREHPS,
24754 IX86_BUILTIN_STORELPS,
24756 IX86_BUILTIN_MASKMOVQ,
24757 IX86_BUILTIN_MOVMSKPS,
24758 IX86_BUILTIN_PMOVMSKB,
24760 IX86_BUILTIN_MOVNTPS,
24761 IX86_BUILTIN_MOVNTQ,
24763 IX86_BUILTIN_LOADDQU,
24764 IX86_BUILTIN_STOREDQU,
24766 IX86_BUILTIN_PACKSSWB,
24767 IX86_BUILTIN_PACKSSDW,
24768 IX86_BUILTIN_PACKUSWB,
24770 IX86_BUILTIN_PADDB,
24771 IX86_BUILTIN_PADDW,
24772 IX86_BUILTIN_PADDD,
24773 IX86_BUILTIN_PADDQ,
24774 IX86_BUILTIN_PADDSB,
24775 IX86_BUILTIN_PADDSW,
24776 IX86_BUILTIN_PADDUSB,
24777 IX86_BUILTIN_PADDUSW,
24778 IX86_BUILTIN_PSUBB,
24779 IX86_BUILTIN_PSUBW,
24780 IX86_BUILTIN_PSUBD,
24781 IX86_BUILTIN_PSUBQ,
24782 IX86_BUILTIN_PSUBSB,
24783 IX86_BUILTIN_PSUBSW,
24784 IX86_BUILTIN_PSUBUSB,
24785 IX86_BUILTIN_PSUBUSW,
24788 IX86_BUILTIN_PANDN,
24792 IX86_BUILTIN_PAVGB,
24793 IX86_BUILTIN_PAVGW,
24795 IX86_BUILTIN_PCMPEQB,
24796 IX86_BUILTIN_PCMPEQW,
24797 IX86_BUILTIN_PCMPEQD,
24798 IX86_BUILTIN_PCMPGTB,
24799 IX86_BUILTIN_PCMPGTW,
24800 IX86_BUILTIN_PCMPGTD,
24802 IX86_BUILTIN_PMADDWD,
24804 IX86_BUILTIN_PMAXSW,
24805 IX86_BUILTIN_PMAXUB,
24806 IX86_BUILTIN_PMINSW,
24807 IX86_BUILTIN_PMINUB,
24809 IX86_BUILTIN_PMULHUW,
24810 IX86_BUILTIN_PMULHW,
24811 IX86_BUILTIN_PMULLW,
24813 IX86_BUILTIN_PSADBW,
24814 IX86_BUILTIN_PSHUFW,
24816 IX86_BUILTIN_PSLLW,
24817 IX86_BUILTIN_PSLLD,
24818 IX86_BUILTIN_PSLLQ,
24819 IX86_BUILTIN_PSRAW,
24820 IX86_BUILTIN_PSRAD,
24821 IX86_BUILTIN_PSRLW,
24822 IX86_BUILTIN_PSRLD,
24823 IX86_BUILTIN_PSRLQ,
24824 IX86_BUILTIN_PSLLWI,
24825 IX86_BUILTIN_PSLLDI,
24826 IX86_BUILTIN_PSLLQI,
24827 IX86_BUILTIN_PSRAWI,
24828 IX86_BUILTIN_PSRADI,
24829 IX86_BUILTIN_PSRLWI,
24830 IX86_BUILTIN_PSRLDI,
24831 IX86_BUILTIN_PSRLQI,
24833 IX86_BUILTIN_PUNPCKHBW,
24834 IX86_BUILTIN_PUNPCKHWD,
24835 IX86_BUILTIN_PUNPCKHDQ,
24836 IX86_BUILTIN_PUNPCKLBW,
24837 IX86_BUILTIN_PUNPCKLWD,
24838 IX86_BUILTIN_PUNPCKLDQ,
24840 IX86_BUILTIN_SHUFPS,
24842 IX86_BUILTIN_RCPPS,
24843 IX86_BUILTIN_RCPSS,
24844 IX86_BUILTIN_RSQRTPS,
24845 IX86_BUILTIN_RSQRTPS_NR,
24846 IX86_BUILTIN_RSQRTSS,
24847 IX86_BUILTIN_RSQRTF,
24848 IX86_BUILTIN_SQRTPS,
24849 IX86_BUILTIN_SQRTPS_NR,
24850 IX86_BUILTIN_SQRTSS,
24852 IX86_BUILTIN_UNPCKHPS,
24853 IX86_BUILTIN_UNPCKLPS,
24855 IX86_BUILTIN_ANDPS,
24856 IX86_BUILTIN_ANDNPS,
24858 IX86_BUILTIN_XORPS,
24861 IX86_BUILTIN_LDMXCSR,
24862 IX86_BUILTIN_STMXCSR,
24863 IX86_BUILTIN_SFENCE,
24865 /* 3DNow! Original */
24866 IX86_BUILTIN_FEMMS,
24867 IX86_BUILTIN_PAVGUSB,
24868 IX86_BUILTIN_PF2ID,
24869 IX86_BUILTIN_PFACC,
24870 IX86_BUILTIN_PFADD,
24871 IX86_BUILTIN_PFCMPEQ,
24872 IX86_BUILTIN_PFCMPGE,
24873 IX86_BUILTIN_PFCMPGT,
24874 IX86_BUILTIN_PFMAX,
24875 IX86_BUILTIN_PFMIN,
24876 IX86_BUILTIN_PFMUL,
24877 IX86_BUILTIN_PFRCP,
24878 IX86_BUILTIN_PFRCPIT1,
24879 IX86_BUILTIN_PFRCPIT2,
24880 IX86_BUILTIN_PFRSQIT1,
24881 IX86_BUILTIN_PFRSQRT,
24882 IX86_BUILTIN_PFSUB,
24883 IX86_BUILTIN_PFSUBR,
24884 IX86_BUILTIN_PI2FD,
24885 IX86_BUILTIN_PMULHRW,
24887 /* 3DNow! Athlon Extensions */
24888 IX86_BUILTIN_PF2IW,
24889 IX86_BUILTIN_PFNACC,
24890 IX86_BUILTIN_PFPNACC,
24891 IX86_BUILTIN_PI2FW,
24892 IX86_BUILTIN_PSWAPDSI,
24893 IX86_BUILTIN_PSWAPDSF,
24896 IX86_BUILTIN_ADDPD,
24897 IX86_BUILTIN_ADDSD,
24898 IX86_BUILTIN_DIVPD,
24899 IX86_BUILTIN_DIVSD,
24900 IX86_BUILTIN_MULPD,
24901 IX86_BUILTIN_MULSD,
24902 IX86_BUILTIN_SUBPD,
24903 IX86_BUILTIN_SUBSD,
24905 IX86_BUILTIN_CMPEQPD,
24906 IX86_BUILTIN_CMPLTPD,
24907 IX86_BUILTIN_CMPLEPD,
24908 IX86_BUILTIN_CMPGTPD,
24909 IX86_BUILTIN_CMPGEPD,
24910 IX86_BUILTIN_CMPNEQPD,
24911 IX86_BUILTIN_CMPNLTPD,
24912 IX86_BUILTIN_CMPNLEPD,
24913 IX86_BUILTIN_CMPNGTPD,
24914 IX86_BUILTIN_CMPNGEPD,
24915 IX86_BUILTIN_CMPORDPD,
24916 IX86_BUILTIN_CMPUNORDPD,
24917 IX86_BUILTIN_CMPEQSD,
24918 IX86_BUILTIN_CMPLTSD,
24919 IX86_BUILTIN_CMPLESD,
24920 IX86_BUILTIN_CMPNEQSD,
24921 IX86_BUILTIN_CMPNLTSD,
24922 IX86_BUILTIN_CMPNLESD,
24923 IX86_BUILTIN_CMPORDSD,
24924 IX86_BUILTIN_CMPUNORDSD,
24926 IX86_BUILTIN_COMIEQSD,
24927 IX86_BUILTIN_COMILTSD,
24928 IX86_BUILTIN_COMILESD,
24929 IX86_BUILTIN_COMIGTSD,
24930 IX86_BUILTIN_COMIGESD,
24931 IX86_BUILTIN_COMINEQSD,
24932 IX86_BUILTIN_UCOMIEQSD,
24933 IX86_BUILTIN_UCOMILTSD,
24934 IX86_BUILTIN_UCOMILESD,
24935 IX86_BUILTIN_UCOMIGTSD,
24936 IX86_BUILTIN_UCOMIGESD,
24937 IX86_BUILTIN_UCOMINEQSD,
24939 IX86_BUILTIN_MAXPD,
24940 IX86_BUILTIN_MAXSD,
24941 IX86_BUILTIN_MINPD,
24942 IX86_BUILTIN_MINSD,
24944 IX86_BUILTIN_ANDPD,
24945 IX86_BUILTIN_ANDNPD,
24947 IX86_BUILTIN_XORPD,
24949 IX86_BUILTIN_SQRTPD,
24950 IX86_BUILTIN_SQRTSD,
24952 IX86_BUILTIN_UNPCKHPD,
24953 IX86_BUILTIN_UNPCKLPD,
24955 IX86_BUILTIN_SHUFPD,
24957 IX86_BUILTIN_LOADUPD,
24958 IX86_BUILTIN_STOREUPD,
24959 IX86_BUILTIN_MOVSD,
24961 IX86_BUILTIN_LOADHPD,
24962 IX86_BUILTIN_LOADLPD,
24964 IX86_BUILTIN_CVTDQ2PD,
24965 IX86_BUILTIN_CVTDQ2PS,
24967 IX86_BUILTIN_CVTPD2DQ,
24968 IX86_BUILTIN_CVTPD2PI,
24969 IX86_BUILTIN_CVTPD2PS,
24970 IX86_BUILTIN_CVTTPD2DQ,
24971 IX86_BUILTIN_CVTTPD2PI,
24973 IX86_BUILTIN_CVTPI2PD,
24974 IX86_BUILTIN_CVTSI2SD,
24975 IX86_BUILTIN_CVTSI642SD,
24977 IX86_BUILTIN_CVTSD2SI,
24978 IX86_BUILTIN_CVTSD2SI64,
24979 IX86_BUILTIN_CVTSD2SS,
24980 IX86_BUILTIN_CVTSS2SD,
24981 IX86_BUILTIN_CVTTSD2SI,
24982 IX86_BUILTIN_CVTTSD2SI64,
24984 IX86_BUILTIN_CVTPS2DQ,
24985 IX86_BUILTIN_CVTPS2PD,
24986 IX86_BUILTIN_CVTTPS2DQ,
24988 IX86_BUILTIN_MOVNTI,
24989 IX86_BUILTIN_MOVNTI64,
24990 IX86_BUILTIN_MOVNTPD,
24991 IX86_BUILTIN_MOVNTDQ,
24993 IX86_BUILTIN_MOVQ128,
24996 IX86_BUILTIN_MASKMOVDQU,
24997 IX86_BUILTIN_MOVMSKPD,
24998 IX86_BUILTIN_PMOVMSKB128,
25000 IX86_BUILTIN_PACKSSWB128,
25001 IX86_BUILTIN_PACKSSDW128,
25002 IX86_BUILTIN_PACKUSWB128,
25004 IX86_BUILTIN_PADDB128,
25005 IX86_BUILTIN_PADDW128,
25006 IX86_BUILTIN_PADDD128,
25007 IX86_BUILTIN_PADDQ128,
25008 IX86_BUILTIN_PADDSB128,
25009 IX86_BUILTIN_PADDSW128,
25010 IX86_BUILTIN_PADDUSB128,
25011 IX86_BUILTIN_PADDUSW128,
25012 IX86_BUILTIN_PSUBB128,
25013 IX86_BUILTIN_PSUBW128,
25014 IX86_BUILTIN_PSUBD128,
25015 IX86_BUILTIN_PSUBQ128,
25016 IX86_BUILTIN_PSUBSB128,
25017 IX86_BUILTIN_PSUBSW128,
25018 IX86_BUILTIN_PSUBUSB128,
25019 IX86_BUILTIN_PSUBUSW128,
25021 IX86_BUILTIN_PAND128,
25022 IX86_BUILTIN_PANDN128,
25023 IX86_BUILTIN_POR128,
25024 IX86_BUILTIN_PXOR128,
25026 IX86_BUILTIN_PAVGB128,
25027 IX86_BUILTIN_PAVGW128,
25029 IX86_BUILTIN_PCMPEQB128,
25030 IX86_BUILTIN_PCMPEQW128,
25031 IX86_BUILTIN_PCMPEQD128,
25032 IX86_BUILTIN_PCMPGTB128,
25033 IX86_BUILTIN_PCMPGTW128,
25034 IX86_BUILTIN_PCMPGTD128,
25036 IX86_BUILTIN_PMADDWD128,
25038 IX86_BUILTIN_PMAXSW128,
25039 IX86_BUILTIN_PMAXUB128,
25040 IX86_BUILTIN_PMINSW128,
25041 IX86_BUILTIN_PMINUB128,
25043 IX86_BUILTIN_PMULUDQ,
25044 IX86_BUILTIN_PMULUDQ128,
25045 IX86_BUILTIN_PMULHUW128,
25046 IX86_BUILTIN_PMULHW128,
25047 IX86_BUILTIN_PMULLW128,
25049 IX86_BUILTIN_PSADBW128,
25050 IX86_BUILTIN_PSHUFHW,
25051 IX86_BUILTIN_PSHUFLW,
25052 IX86_BUILTIN_PSHUFD,
25054 IX86_BUILTIN_PSLLDQI128,
25055 IX86_BUILTIN_PSLLWI128,
25056 IX86_BUILTIN_PSLLDI128,
25057 IX86_BUILTIN_PSLLQI128,
25058 IX86_BUILTIN_PSRAWI128,
25059 IX86_BUILTIN_PSRADI128,
25060 IX86_BUILTIN_PSRLDQI128,
25061 IX86_BUILTIN_PSRLWI128,
25062 IX86_BUILTIN_PSRLDI128,
25063 IX86_BUILTIN_PSRLQI128,
25065 IX86_BUILTIN_PSLLDQ128,
25066 IX86_BUILTIN_PSLLW128,
25067 IX86_BUILTIN_PSLLD128,
25068 IX86_BUILTIN_PSLLQ128,
25069 IX86_BUILTIN_PSRAW128,
25070 IX86_BUILTIN_PSRAD128,
25071 IX86_BUILTIN_PSRLW128,
25072 IX86_BUILTIN_PSRLD128,
25073 IX86_BUILTIN_PSRLQ128,
25075 IX86_BUILTIN_PUNPCKHBW128,
25076 IX86_BUILTIN_PUNPCKHWD128,
25077 IX86_BUILTIN_PUNPCKHDQ128,
25078 IX86_BUILTIN_PUNPCKHQDQ128,
25079 IX86_BUILTIN_PUNPCKLBW128,
25080 IX86_BUILTIN_PUNPCKLWD128,
25081 IX86_BUILTIN_PUNPCKLDQ128,
25082 IX86_BUILTIN_PUNPCKLQDQ128,
25084 IX86_BUILTIN_CLFLUSH,
25085 IX86_BUILTIN_MFENCE,
25086 IX86_BUILTIN_LFENCE,
25087 IX86_BUILTIN_PAUSE,
25089 IX86_BUILTIN_BSRSI,
25090 IX86_BUILTIN_BSRDI,
25091 IX86_BUILTIN_RDPMC,
25092 IX86_BUILTIN_RDTSC,
25093 IX86_BUILTIN_RDTSCP,
25094 IX86_BUILTIN_ROLQI,
25095 IX86_BUILTIN_ROLHI,
25096 IX86_BUILTIN_RORQI,
25097 IX86_BUILTIN_RORHI,
25100 IX86_BUILTIN_ADDSUBPS,
25101 IX86_BUILTIN_HADDPS,
25102 IX86_BUILTIN_HSUBPS,
25103 IX86_BUILTIN_MOVSHDUP,
25104 IX86_BUILTIN_MOVSLDUP,
25105 IX86_BUILTIN_ADDSUBPD,
25106 IX86_BUILTIN_HADDPD,
25107 IX86_BUILTIN_HSUBPD,
25108 IX86_BUILTIN_LDDQU,
25110 IX86_BUILTIN_MONITOR,
25111 IX86_BUILTIN_MWAIT,
25114 IX86_BUILTIN_PHADDW,
25115 IX86_BUILTIN_PHADDD,
25116 IX86_BUILTIN_PHADDSW,
25117 IX86_BUILTIN_PHSUBW,
25118 IX86_BUILTIN_PHSUBD,
25119 IX86_BUILTIN_PHSUBSW,
25120 IX86_BUILTIN_PMADDUBSW,
25121 IX86_BUILTIN_PMULHRSW,
25122 IX86_BUILTIN_PSHUFB,
25123 IX86_BUILTIN_PSIGNB,
25124 IX86_BUILTIN_PSIGNW,
25125 IX86_BUILTIN_PSIGND,
25126 IX86_BUILTIN_PALIGNR,
25127 IX86_BUILTIN_PABSB,
25128 IX86_BUILTIN_PABSW,
25129 IX86_BUILTIN_PABSD,
25131 IX86_BUILTIN_PHADDW128,
25132 IX86_BUILTIN_PHADDD128,
25133 IX86_BUILTIN_PHADDSW128,
25134 IX86_BUILTIN_PHSUBW128,
25135 IX86_BUILTIN_PHSUBD128,
25136 IX86_BUILTIN_PHSUBSW128,
25137 IX86_BUILTIN_PMADDUBSW128,
25138 IX86_BUILTIN_PMULHRSW128,
25139 IX86_BUILTIN_PSHUFB128,
25140 IX86_BUILTIN_PSIGNB128,
25141 IX86_BUILTIN_PSIGNW128,
25142 IX86_BUILTIN_PSIGND128,
25143 IX86_BUILTIN_PALIGNR128,
25144 IX86_BUILTIN_PABSB128,
25145 IX86_BUILTIN_PABSW128,
25146 IX86_BUILTIN_PABSD128,
25148 /* AMDFAM10 - SSE4A New Instructions. */
25149 IX86_BUILTIN_MOVNTSD,
25150 IX86_BUILTIN_MOVNTSS,
25151 IX86_BUILTIN_EXTRQI,
25152 IX86_BUILTIN_EXTRQ,
25153 IX86_BUILTIN_INSERTQI,
25154 IX86_BUILTIN_INSERTQ,
25157 IX86_BUILTIN_BLENDPD,
25158 IX86_BUILTIN_BLENDPS,
25159 IX86_BUILTIN_BLENDVPD,
25160 IX86_BUILTIN_BLENDVPS,
25161 IX86_BUILTIN_PBLENDVB128,
25162 IX86_BUILTIN_PBLENDW128,
25167 IX86_BUILTIN_INSERTPS128,
25169 IX86_BUILTIN_MOVNTDQA,
25170 IX86_BUILTIN_MPSADBW128,
25171 IX86_BUILTIN_PACKUSDW128,
25172 IX86_BUILTIN_PCMPEQQ,
25173 IX86_BUILTIN_PHMINPOSUW128,
25175 IX86_BUILTIN_PMAXSB128,
25176 IX86_BUILTIN_PMAXSD128,
25177 IX86_BUILTIN_PMAXUD128,
25178 IX86_BUILTIN_PMAXUW128,
25180 IX86_BUILTIN_PMINSB128,
25181 IX86_BUILTIN_PMINSD128,
25182 IX86_BUILTIN_PMINUD128,
25183 IX86_BUILTIN_PMINUW128,
25185 IX86_BUILTIN_PMOVSXBW128,
25186 IX86_BUILTIN_PMOVSXBD128,
25187 IX86_BUILTIN_PMOVSXBQ128,
25188 IX86_BUILTIN_PMOVSXWD128,
25189 IX86_BUILTIN_PMOVSXWQ128,
25190 IX86_BUILTIN_PMOVSXDQ128,
25192 IX86_BUILTIN_PMOVZXBW128,
25193 IX86_BUILTIN_PMOVZXBD128,
25194 IX86_BUILTIN_PMOVZXBQ128,
25195 IX86_BUILTIN_PMOVZXWD128,
25196 IX86_BUILTIN_PMOVZXWQ128,
25197 IX86_BUILTIN_PMOVZXDQ128,
25199 IX86_BUILTIN_PMULDQ128,
25200 IX86_BUILTIN_PMULLD128,
25202 IX86_BUILTIN_ROUNDSD,
25203 IX86_BUILTIN_ROUNDSS,
25205 IX86_BUILTIN_ROUNDPD,
25206 IX86_BUILTIN_ROUNDPS,
25208 IX86_BUILTIN_FLOORPD,
25209 IX86_BUILTIN_CEILPD,
25210 IX86_BUILTIN_TRUNCPD,
25211 IX86_BUILTIN_RINTPD,
25212 IX86_BUILTIN_ROUNDPD_AZ,
25214 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
25215 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
25216 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
25218 IX86_BUILTIN_FLOORPS,
25219 IX86_BUILTIN_CEILPS,
25220 IX86_BUILTIN_TRUNCPS,
25221 IX86_BUILTIN_RINTPS,
25222 IX86_BUILTIN_ROUNDPS_AZ,
25224 IX86_BUILTIN_FLOORPS_SFIX,
25225 IX86_BUILTIN_CEILPS_SFIX,
25226 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
25228 IX86_BUILTIN_PTESTZ,
25229 IX86_BUILTIN_PTESTC,
25230 IX86_BUILTIN_PTESTNZC,
25232 IX86_BUILTIN_VEC_INIT_V2SI,
25233 IX86_BUILTIN_VEC_INIT_V4HI,
25234 IX86_BUILTIN_VEC_INIT_V8QI,
25235 IX86_BUILTIN_VEC_EXT_V2DF,
25236 IX86_BUILTIN_VEC_EXT_V2DI,
25237 IX86_BUILTIN_VEC_EXT_V4SF,
25238 IX86_BUILTIN_VEC_EXT_V4SI,
25239 IX86_BUILTIN_VEC_EXT_V8HI,
25240 IX86_BUILTIN_VEC_EXT_V2SI,
25241 IX86_BUILTIN_VEC_EXT_V4HI,
25242 IX86_BUILTIN_VEC_EXT_V16QI,
25243 IX86_BUILTIN_VEC_SET_V2DI,
25244 IX86_BUILTIN_VEC_SET_V4SF,
25245 IX86_BUILTIN_VEC_SET_V4SI,
25246 IX86_BUILTIN_VEC_SET_V8HI,
25247 IX86_BUILTIN_VEC_SET_V4HI,
25248 IX86_BUILTIN_VEC_SET_V16QI,
25250 IX86_BUILTIN_VEC_PACK_SFIX,
25251 IX86_BUILTIN_VEC_PACK_SFIX256,
25254 IX86_BUILTIN_CRC32QI,
25255 IX86_BUILTIN_CRC32HI,
25256 IX86_BUILTIN_CRC32SI,
25257 IX86_BUILTIN_CRC32DI,
25259 IX86_BUILTIN_PCMPESTRI128,
25260 IX86_BUILTIN_PCMPESTRM128,
25261 IX86_BUILTIN_PCMPESTRA128,
25262 IX86_BUILTIN_PCMPESTRC128,
25263 IX86_BUILTIN_PCMPESTRO128,
25264 IX86_BUILTIN_PCMPESTRS128,
25265 IX86_BUILTIN_PCMPESTRZ128,
25266 IX86_BUILTIN_PCMPISTRI128,
25267 IX86_BUILTIN_PCMPISTRM128,
25268 IX86_BUILTIN_PCMPISTRA128,
25269 IX86_BUILTIN_PCMPISTRC128,
25270 IX86_BUILTIN_PCMPISTRO128,
25271 IX86_BUILTIN_PCMPISTRS128,
25272 IX86_BUILTIN_PCMPISTRZ128,
25274 IX86_BUILTIN_PCMPGTQ,
25276 /* AES instructions */
25277 IX86_BUILTIN_AESENC128,
25278 IX86_BUILTIN_AESENCLAST128,
25279 IX86_BUILTIN_AESDEC128,
25280 IX86_BUILTIN_AESDECLAST128,
25281 IX86_BUILTIN_AESIMC128,
25282 IX86_BUILTIN_AESKEYGENASSIST128,
25284 /* PCLMUL instruction */
25285 IX86_BUILTIN_PCLMULQDQ128,
25288 IX86_BUILTIN_ADDPD256,
25289 IX86_BUILTIN_ADDPS256,
25290 IX86_BUILTIN_ADDSUBPD256,
25291 IX86_BUILTIN_ADDSUBPS256,
25292 IX86_BUILTIN_ANDPD256,
25293 IX86_BUILTIN_ANDPS256,
25294 IX86_BUILTIN_ANDNPD256,
25295 IX86_BUILTIN_ANDNPS256,
25296 IX86_BUILTIN_BLENDPD256,
25297 IX86_BUILTIN_BLENDPS256,
25298 IX86_BUILTIN_BLENDVPD256,
25299 IX86_BUILTIN_BLENDVPS256,
25300 IX86_BUILTIN_DIVPD256,
25301 IX86_BUILTIN_DIVPS256,
25302 IX86_BUILTIN_DPPS256,
25303 IX86_BUILTIN_HADDPD256,
25304 IX86_BUILTIN_HADDPS256,
25305 IX86_BUILTIN_HSUBPD256,
25306 IX86_BUILTIN_HSUBPS256,
25307 IX86_BUILTIN_MAXPD256,
25308 IX86_BUILTIN_MAXPS256,
25309 IX86_BUILTIN_MINPD256,
25310 IX86_BUILTIN_MINPS256,
25311 IX86_BUILTIN_MULPD256,
25312 IX86_BUILTIN_MULPS256,
25313 IX86_BUILTIN_ORPD256,
25314 IX86_BUILTIN_ORPS256,
25315 IX86_BUILTIN_SHUFPD256,
25316 IX86_BUILTIN_SHUFPS256,
25317 IX86_BUILTIN_SUBPD256,
25318 IX86_BUILTIN_SUBPS256,
25319 IX86_BUILTIN_XORPD256,
25320 IX86_BUILTIN_XORPS256,
25321 IX86_BUILTIN_CMPSD,
25322 IX86_BUILTIN_CMPSS,
25323 IX86_BUILTIN_CMPPD,
25324 IX86_BUILTIN_CMPPS,
25325 IX86_BUILTIN_CMPPD256,
25326 IX86_BUILTIN_CMPPS256,
25327 IX86_BUILTIN_CVTDQ2PD256,
25328 IX86_BUILTIN_CVTDQ2PS256,
25329 IX86_BUILTIN_CVTPD2PS256,
25330 IX86_BUILTIN_CVTPS2DQ256,
25331 IX86_BUILTIN_CVTPS2PD256,
25332 IX86_BUILTIN_CVTTPD2DQ256,
25333 IX86_BUILTIN_CVTPD2DQ256,
25334 IX86_BUILTIN_CVTTPS2DQ256,
25335 IX86_BUILTIN_EXTRACTF128PD256,
25336 IX86_BUILTIN_EXTRACTF128PS256,
25337 IX86_BUILTIN_EXTRACTF128SI256,
25338 IX86_BUILTIN_VZEROALL,
25339 IX86_BUILTIN_VZEROUPPER,
25340 IX86_BUILTIN_VPERMILVARPD,
25341 IX86_BUILTIN_VPERMILVARPS,
25342 IX86_BUILTIN_VPERMILVARPD256,
25343 IX86_BUILTIN_VPERMILVARPS256,
25344 IX86_BUILTIN_VPERMILPD,
25345 IX86_BUILTIN_VPERMILPS,
25346 IX86_BUILTIN_VPERMILPD256,
25347 IX86_BUILTIN_VPERMILPS256,
25348 IX86_BUILTIN_VPERMIL2PD,
25349 IX86_BUILTIN_VPERMIL2PS,
25350 IX86_BUILTIN_VPERMIL2PD256,
25351 IX86_BUILTIN_VPERMIL2PS256,
25352 IX86_BUILTIN_VPERM2F128PD256,
25353 IX86_BUILTIN_VPERM2F128PS256,
25354 IX86_BUILTIN_VPERM2F128SI256,
25355 IX86_BUILTIN_VBROADCASTSS,
25356 IX86_BUILTIN_VBROADCASTSD256,
25357 IX86_BUILTIN_VBROADCASTSS256,
25358 IX86_BUILTIN_VBROADCASTPD256,
25359 IX86_BUILTIN_VBROADCASTPS256,
25360 IX86_BUILTIN_VINSERTF128PD256,
25361 IX86_BUILTIN_VINSERTF128PS256,
25362 IX86_BUILTIN_VINSERTF128SI256,
25363 IX86_BUILTIN_LOADUPD256,
25364 IX86_BUILTIN_LOADUPS256,
25365 IX86_BUILTIN_STOREUPD256,
25366 IX86_BUILTIN_STOREUPS256,
25367 IX86_BUILTIN_LDDQU256,
25368 IX86_BUILTIN_MOVNTDQ256,
25369 IX86_BUILTIN_MOVNTPD256,
25370 IX86_BUILTIN_MOVNTPS256,
25371 IX86_BUILTIN_LOADDQU256,
25372 IX86_BUILTIN_STOREDQU256,
25373 IX86_BUILTIN_MASKLOADPD,
25374 IX86_BUILTIN_MASKLOADPS,
25375 IX86_BUILTIN_MASKSTOREPD,
25376 IX86_BUILTIN_MASKSTOREPS,
25377 IX86_BUILTIN_MASKLOADPD256,
25378 IX86_BUILTIN_MASKLOADPS256,
25379 IX86_BUILTIN_MASKSTOREPD256,
25380 IX86_BUILTIN_MASKSTOREPS256,
25381 IX86_BUILTIN_MOVSHDUP256,
25382 IX86_BUILTIN_MOVSLDUP256,
25383 IX86_BUILTIN_MOVDDUP256,
25385 IX86_BUILTIN_SQRTPD256,
25386 IX86_BUILTIN_SQRTPS256,
25387 IX86_BUILTIN_SQRTPS_NR256,
25388 IX86_BUILTIN_RSQRTPS256,
25389 IX86_BUILTIN_RSQRTPS_NR256,
25391 IX86_BUILTIN_RCPPS256,
25393 IX86_BUILTIN_ROUNDPD256,
25394 IX86_BUILTIN_ROUNDPS256,
25396 IX86_BUILTIN_FLOORPD256,
25397 IX86_BUILTIN_CEILPD256,
25398 IX86_BUILTIN_TRUNCPD256,
25399 IX86_BUILTIN_RINTPD256,
25400 IX86_BUILTIN_ROUNDPD_AZ256,
25402 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
25403 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
25404 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
25406 IX86_BUILTIN_FLOORPS256,
25407 IX86_BUILTIN_CEILPS256,
25408 IX86_BUILTIN_TRUNCPS256,
25409 IX86_BUILTIN_RINTPS256,
25410 IX86_BUILTIN_ROUNDPS_AZ256,
25412 IX86_BUILTIN_FLOORPS_SFIX256,
25413 IX86_BUILTIN_CEILPS_SFIX256,
25414 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
25416 IX86_BUILTIN_UNPCKHPD256,
25417 IX86_BUILTIN_UNPCKLPD256,
25418 IX86_BUILTIN_UNPCKHPS256,
25419 IX86_BUILTIN_UNPCKLPS256,
25421 IX86_BUILTIN_SI256_SI,
25422 IX86_BUILTIN_PS256_PS,
25423 IX86_BUILTIN_PD256_PD,
25424 IX86_BUILTIN_SI_SI256,
25425 IX86_BUILTIN_PS_PS256,
25426 IX86_BUILTIN_PD_PD256,
25428 IX86_BUILTIN_VTESTZPD,
25429 IX86_BUILTIN_VTESTCPD,
25430 IX86_BUILTIN_VTESTNZCPD,
25431 IX86_BUILTIN_VTESTZPS,
25432 IX86_BUILTIN_VTESTCPS,
25433 IX86_BUILTIN_VTESTNZCPS,
25434 IX86_BUILTIN_VTESTZPD256,
25435 IX86_BUILTIN_VTESTCPD256,
25436 IX86_BUILTIN_VTESTNZCPD256,
25437 IX86_BUILTIN_VTESTZPS256,
25438 IX86_BUILTIN_VTESTCPS256,
25439 IX86_BUILTIN_VTESTNZCPS256,
25440 IX86_BUILTIN_PTESTZ256,
25441 IX86_BUILTIN_PTESTC256,
25442 IX86_BUILTIN_PTESTNZC256,
25444 IX86_BUILTIN_MOVMSKPD256,
25445 IX86_BUILTIN_MOVMSKPS256,
25448 IX86_BUILTIN_MPSADBW256,
25449 IX86_BUILTIN_PABSB256,
25450 IX86_BUILTIN_PABSW256,
25451 IX86_BUILTIN_PABSD256,
25452 IX86_BUILTIN_PACKSSDW256,
25453 IX86_BUILTIN_PACKSSWB256,
25454 IX86_BUILTIN_PACKUSDW256,
25455 IX86_BUILTIN_PACKUSWB256,
25456 IX86_BUILTIN_PADDB256,
25457 IX86_BUILTIN_PADDW256,
25458 IX86_BUILTIN_PADDD256,
25459 IX86_BUILTIN_PADDQ256,
25460 IX86_BUILTIN_PADDSB256,
25461 IX86_BUILTIN_PADDSW256,
25462 IX86_BUILTIN_PADDUSB256,
25463 IX86_BUILTIN_PADDUSW256,
25464 IX86_BUILTIN_PALIGNR256,
25465 IX86_BUILTIN_AND256I,
25466 IX86_BUILTIN_ANDNOT256I,
25467 IX86_BUILTIN_PAVGB256,
25468 IX86_BUILTIN_PAVGW256,
25469 IX86_BUILTIN_PBLENDVB256,
25470 IX86_BUILTIN_PBLENDVW256,
25471 IX86_BUILTIN_PCMPEQB256,
25472 IX86_BUILTIN_PCMPEQW256,
25473 IX86_BUILTIN_PCMPEQD256,
25474 IX86_BUILTIN_PCMPEQQ256,
25475 IX86_BUILTIN_PCMPGTB256,
25476 IX86_BUILTIN_PCMPGTW256,
25477 IX86_BUILTIN_PCMPGTD256,
25478 IX86_BUILTIN_PCMPGTQ256,
25479 IX86_BUILTIN_PHADDW256,
25480 IX86_BUILTIN_PHADDD256,
25481 IX86_BUILTIN_PHADDSW256,
25482 IX86_BUILTIN_PHSUBW256,
25483 IX86_BUILTIN_PHSUBD256,
25484 IX86_BUILTIN_PHSUBSW256,
25485 IX86_BUILTIN_PMADDUBSW256,
25486 IX86_BUILTIN_PMADDWD256,
25487 IX86_BUILTIN_PMAXSB256,
25488 IX86_BUILTIN_PMAXSW256,
25489 IX86_BUILTIN_PMAXSD256,
25490 IX86_BUILTIN_PMAXUB256,
25491 IX86_BUILTIN_PMAXUW256,
25492 IX86_BUILTIN_PMAXUD256,
25493 IX86_BUILTIN_PMINSB256,
25494 IX86_BUILTIN_PMINSW256,
25495 IX86_BUILTIN_PMINSD256,
25496 IX86_BUILTIN_PMINUB256,
25497 IX86_BUILTIN_PMINUW256,
25498 IX86_BUILTIN_PMINUD256,
25499 IX86_BUILTIN_PMOVMSKB256,
25500 IX86_BUILTIN_PMOVSXBW256,
25501 IX86_BUILTIN_PMOVSXBD256,
25502 IX86_BUILTIN_PMOVSXBQ256,
25503 IX86_BUILTIN_PMOVSXWD256,
25504 IX86_BUILTIN_PMOVSXWQ256,
25505 IX86_BUILTIN_PMOVSXDQ256,
25506 IX86_BUILTIN_PMOVZXBW256,
25507 IX86_BUILTIN_PMOVZXBD256,
25508 IX86_BUILTIN_PMOVZXBQ256,
25509 IX86_BUILTIN_PMOVZXWD256,
25510 IX86_BUILTIN_PMOVZXWQ256,
25511 IX86_BUILTIN_PMOVZXDQ256,
25512 IX86_BUILTIN_PMULDQ256,
25513 IX86_BUILTIN_PMULHRSW256,
25514 IX86_BUILTIN_PMULHUW256,
25515 IX86_BUILTIN_PMULHW256,
25516 IX86_BUILTIN_PMULLW256,
25517 IX86_BUILTIN_PMULLD256,
25518 IX86_BUILTIN_PMULUDQ256,
25519 IX86_BUILTIN_POR256,
25520 IX86_BUILTIN_PSADBW256,
25521 IX86_BUILTIN_PSHUFB256,
25522 IX86_BUILTIN_PSHUFD256,
25523 IX86_BUILTIN_PSHUFHW256,
25524 IX86_BUILTIN_PSHUFLW256,
25525 IX86_BUILTIN_PSIGNB256,
25526 IX86_BUILTIN_PSIGNW256,
25527 IX86_BUILTIN_PSIGND256,
25528 IX86_BUILTIN_PSLLDQI256,
25529 IX86_BUILTIN_PSLLWI256,
25530 IX86_BUILTIN_PSLLW256,
25531 IX86_BUILTIN_PSLLDI256,
25532 IX86_BUILTIN_PSLLD256,
25533 IX86_BUILTIN_PSLLQI256,
25534 IX86_BUILTIN_PSLLQ256,
25535 IX86_BUILTIN_PSRAWI256,
25536 IX86_BUILTIN_PSRAW256,
25537 IX86_BUILTIN_PSRADI256,
25538 IX86_BUILTIN_PSRAD256,
25539 IX86_BUILTIN_PSRLDQI256,
25540 IX86_BUILTIN_PSRLWI256,
25541 IX86_BUILTIN_PSRLW256,
25542 IX86_BUILTIN_PSRLDI256,
25543 IX86_BUILTIN_PSRLD256,
25544 IX86_BUILTIN_PSRLQI256,
25545 IX86_BUILTIN_PSRLQ256,
25546 IX86_BUILTIN_PSUBB256,
25547 IX86_BUILTIN_PSUBW256,
25548 IX86_BUILTIN_PSUBD256,
25549 IX86_BUILTIN_PSUBQ256,
25550 IX86_BUILTIN_PSUBSB256,
25551 IX86_BUILTIN_PSUBSW256,
25552 IX86_BUILTIN_PSUBUSB256,
25553 IX86_BUILTIN_PSUBUSW256,
25554 IX86_BUILTIN_PUNPCKHBW256,
25555 IX86_BUILTIN_PUNPCKHWD256,
25556 IX86_BUILTIN_PUNPCKHDQ256,
25557 IX86_BUILTIN_PUNPCKHQDQ256,
25558 IX86_BUILTIN_PUNPCKLBW256,
25559 IX86_BUILTIN_PUNPCKLWD256,
25560 IX86_BUILTIN_PUNPCKLDQ256,
25561 IX86_BUILTIN_PUNPCKLQDQ256,
25562 IX86_BUILTIN_PXOR256,
25563 IX86_BUILTIN_MOVNTDQA256,
25564 IX86_BUILTIN_VBROADCASTSS_PS,
25565 IX86_BUILTIN_VBROADCASTSS_PS256,
25566 IX86_BUILTIN_VBROADCASTSD_PD256,
25567 IX86_BUILTIN_VBROADCASTSI256,
25568 IX86_BUILTIN_PBLENDD256,
25569 IX86_BUILTIN_PBLENDD128,
25570 IX86_BUILTIN_PBROADCASTB256,
25571 IX86_BUILTIN_PBROADCASTW256,
25572 IX86_BUILTIN_PBROADCASTD256,
25573 IX86_BUILTIN_PBROADCASTQ256,
25574 IX86_BUILTIN_PBROADCASTB128,
25575 IX86_BUILTIN_PBROADCASTW128,
25576 IX86_BUILTIN_PBROADCASTD128,
25577 IX86_BUILTIN_PBROADCASTQ128,
25578 IX86_BUILTIN_VPERMVARSI256,
25579 IX86_BUILTIN_VPERMDF256,
25580 IX86_BUILTIN_VPERMVARSF256,
25581 IX86_BUILTIN_VPERMDI256,
25582 IX86_BUILTIN_VPERMTI256,
25583 IX86_BUILTIN_VEXTRACT128I256,
25584 IX86_BUILTIN_VINSERT128I256,
25585 IX86_BUILTIN_MASKLOADD,
25586 IX86_BUILTIN_MASKLOADQ,
25587 IX86_BUILTIN_MASKLOADD256,
25588 IX86_BUILTIN_MASKLOADQ256,
25589 IX86_BUILTIN_MASKSTORED,
25590 IX86_BUILTIN_MASKSTOREQ,
25591 IX86_BUILTIN_MASKSTORED256,
25592 IX86_BUILTIN_MASKSTOREQ256,
25593 IX86_BUILTIN_PSLLVV4DI,
25594 IX86_BUILTIN_PSLLVV2DI,
25595 IX86_BUILTIN_PSLLVV8SI,
25596 IX86_BUILTIN_PSLLVV4SI,
25597 IX86_BUILTIN_PSRAVV8SI,
25598 IX86_BUILTIN_PSRAVV4SI,
25599 IX86_BUILTIN_PSRLVV4DI,
25600 IX86_BUILTIN_PSRLVV2DI,
25601 IX86_BUILTIN_PSRLVV8SI,
25602 IX86_BUILTIN_PSRLVV4SI,
25604 IX86_BUILTIN_GATHERSIV2DF,
25605 IX86_BUILTIN_GATHERSIV4DF,
25606 IX86_BUILTIN_GATHERDIV2DF,
25607 IX86_BUILTIN_GATHERDIV4DF,
25608 IX86_BUILTIN_GATHERSIV4SF,
25609 IX86_BUILTIN_GATHERSIV8SF,
25610 IX86_BUILTIN_GATHERDIV4SF,
25611 IX86_BUILTIN_GATHERDIV8SF,
25612 IX86_BUILTIN_GATHERSIV2DI,
25613 IX86_BUILTIN_GATHERSIV4DI,
25614 IX86_BUILTIN_GATHERDIV2DI,
25615 IX86_BUILTIN_GATHERDIV4DI,
25616 IX86_BUILTIN_GATHERSIV4SI,
25617 IX86_BUILTIN_GATHERSIV8SI,
25618 IX86_BUILTIN_GATHERDIV4SI,
25619 IX86_BUILTIN_GATHERDIV8SI,
25621 /* Alternate 4 element gather for the vectorizer where
25622 all operands are 32-byte wide. */
25623 IX86_BUILTIN_GATHERALTSIV4DF,
25624 IX86_BUILTIN_GATHERALTDIV8SF,
25625 IX86_BUILTIN_GATHERALTSIV4DI,
25626 IX86_BUILTIN_GATHERALTDIV8SI,
25628 /* TFmode support builtins. */
25630 IX86_BUILTIN_HUGE_VALQ,
25631 IX86_BUILTIN_FABSQ,
25632 IX86_BUILTIN_COPYSIGNQ,
25634 /* Vectorizer support builtins. */
25635 IX86_BUILTIN_CPYSGNPS,
25636 IX86_BUILTIN_CPYSGNPD,
25637 IX86_BUILTIN_CPYSGNPS256,
25638 IX86_BUILTIN_CPYSGNPD256,
25640 /* FMA4 instructions. */
25641 IX86_BUILTIN_VFMADDSS,
25642 IX86_BUILTIN_VFMADDSD,
25643 IX86_BUILTIN_VFMADDPS,
25644 IX86_BUILTIN_VFMADDPD,
25645 IX86_BUILTIN_VFMADDPS256,
25646 IX86_BUILTIN_VFMADDPD256,
25647 IX86_BUILTIN_VFMADDSUBPS,
25648 IX86_BUILTIN_VFMADDSUBPD,
25649 IX86_BUILTIN_VFMADDSUBPS256,
25650 IX86_BUILTIN_VFMADDSUBPD256,
25652 /* FMA3 instructions. */
25653 IX86_BUILTIN_VFMADDSS3,
25654 IX86_BUILTIN_VFMADDSD3,
25656 /* XOP instructions. */
25657 IX86_BUILTIN_VPCMOV,
25658 IX86_BUILTIN_VPCMOV_V2DI,
25659 IX86_BUILTIN_VPCMOV_V4SI,
25660 IX86_BUILTIN_VPCMOV_V8HI,
25661 IX86_BUILTIN_VPCMOV_V16QI,
25662 IX86_BUILTIN_VPCMOV_V4SF,
25663 IX86_BUILTIN_VPCMOV_V2DF,
25664 IX86_BUILTIN_VPCMOV256,
25665 IX86_BUILTIN_VPCMOV_V4DI256,
25666 IX86_BUILTIN_VPCMOV_V8SI256,
25667 IX86_BUILTIN_VPCMOV_V16HI256,
25668 IX86_BUILTIN_VPCMOV_V32QI256,
25669 IX86_BUILTIN_VPCMOV_V8SF256,
25670 IX86_BUILTIN_VPCMOV_V4DF256,
25672 IX86_BUILTIN_VPPERM,
25674 IX86_BUILTIN_VPMACSSWW,
25675 IX86_BUILTIN_VPMACSWW,
25676 IX86_BUILTIN_VPMACSSWD,
25677 IX86_BUILTIN_VPMACSWD,
25678 IX86_BUILTIN_VPMACSSDD,
25679 IX86_BUILTIN_VPMACSDD,
25680 IX86_BUILTIN_VPMACSSDQL,
25681 IX86_BUILTIN_VPMACSSDQH,
25682 IX86_BUILTIN_VPMACSDQL,
25683 IX86_BUILTIN_VPMACSDQH,
25684 IX86_BUILTIN_VPMADCSSWD,
25685 IX86_BUILTIN_VPMADCSWD,
25687 IX86_BUILTIN_VPHADDBW,
25688 IX86_BUILTIN_VPHADDBD,
25689 IX86_BUILTIN_VPHADDBQ,
25690 IX86_BUILTIN_VPHADDWD,
25691 IX86_BUILTIN_VPHADDWQ,
25692 IX86_BUILTIN_VPHADDDQ,
25693 IX86_BUILTIN_VPHADDUBW,
25694 IX86_BUILTIN_VPHADDUBD,
25695 IX86_BUILTIN_VPHADDUBQ,
25696 IX86_BUILTIN_VPHADDUWD,
25697 IX86_BUILTIN_VPHADDUWQ,
25698 IX86_BUILTIN_VPHADDUDQ,
25699 IX86_BUILTIN_VPHSUBBW,
25700 IX86_BUILTIN_VPHSUBWD,
25701 IX86_BUILTIN_VPHSUBDQ,
25703 IX86_BUILTIN_VPROTB,
25704 IX86_BUILTIN_VPROTW,
25705 IX86_BUILTIN_VPROTD,
25706 IX86_BUILTIN_VPROTQ,
25707 IX86_BUILTIN_VPROTB_IMM,
25708 IX86_BUILTIN_VPROTW_IMM,
25709 IX86_BUILTIN_VPROTD_IMM,
25710 IX86_BUILTIN_VPROTQ_IMM,
25712 IX86_BUILTIN_VPSHLB,
25713 IX86_BUILTIN_VPSHLW,
25714 IX86_BUILTIN_VPSHLD,
25715 IX86_BUILTIN_VPSHLQ,
25716 IX86_BUILTIN_VPSHAB,
25717 IX86_BUILTIN_VPSHAW,
25718 IX86_BUILTIN_VPSHAD,
25719 IX86_BUILTIN_VPSHAQ,
25721 IX86_BUILTIN_VFRCZSS,
25722 IX86_BUILTIN_VFRCZSD,
25723 IX86_BUILTIN_VFRCZPS,
25724 IX86_BUILTIN_VFRCZPD,
25725 IX86_BUILTIN_VFRCZPS256,
25726 IX86_BUILTIN_VFRCZPD256,
25728 IX86_BUILTIN_VPCOMEQUB,
25729 IX86_BUILTIN_VPCOMNEUB,
25730 IX86_BUILTIN_VPCOMLTUB,
25731 IX86_BUILTIN_VPCOMLEUB,
25732 IX86_BUILTIN_VPCOMGTUB,
25733 IX86_BUILTIN_VPCOMGEUB,
25734 IX86_BUILTIN_VPCOMFALSEUB,
25735 IX86_BUILTIN_VPCOMTRUEUB,
25737 IX86_BUILTIN_VPCOMEQUW,
25738 IX86_BUILTIN_VPCOMNEUW,
25739 IX86_BUILTIN_VPCOMLTUW,
25740 IX86_BUILTIN_VPCOMLEUW,
25741 IX86_BUILTIN_VPCOMGTUW,
25742 IX86_BUILTIN_VPCOMGEUW,
25743 IX86_BUILTIN_VPCOMFALSEUW,
25744 IX86_BUILTIN_VPCOMTRUEUW,
25746 IX86_BUILTIN_VPCOMEQUD,
25747 IX86_BUILTIN_VPCOMNEUD,
25748 IX86_BUILTIN_VPCOMLTUD,
25749 IX86_BUILTIN_VPCOMLEUD,
25750 IX86_BUILTIN_VPCOMGTUD,
25751 IX86_BUILTIN_VPCOMGEUD,
25752 IX86_BUILTIN_VPCOMFALSEUD,
25753 IX86_BUILTIN_VPCOMTRUEUD,
25755 IX86_BUILTIN_VPCOMEQUQ,
25756 IX86_BUILTIN_VPCOMNEUQ,
25757 IX86_BUILTIN_VPCOMLTUQ,
25758 IX86_BUILTIN_VPCOMLEUQ,
25759 IX86_BUILTIN_VPCOMGTUQ,
25760 IX86_BUILTIN_VPCOMGEUQ,
25761 IX86_BUILTIN_VPCOMFALSEUQ,
25762 IX86_BUILTIN_VPCOMTRUEUQ,
25764 IX86_BUILTIN_VPCOMEQB,
25765 IX86_BUILTIN_VPCOMNEB,
25766 IX86_BUILTIN_VPCOMLTB,
25767 IX86_BUILTIN_VPCOMLEB,
25768 IX86_BUILTIN_VPCOMGTB,
25769 IX86_BUILTIN_VPCOMGEB,
25770 IX86_BUILTIN_VPCOMFALSEB,
25771 IX86_BUILTIN_VPCOMTRUEB,
25773 IX86_BUILTIN_VPCOMEQW,
25774 IX86_BUILTIN_VPCOMNEW,
25775 IX86_BUILTIN_VPCOMLTW,
25776 IX86_BUILTIN_VPCOMLEW,
25777 IX86_BUILTIN_VPCOMGTW,
25778 IX86_BUILTIN_VPCOMGEW,
25779 IX86_BUILTIN_VPCOMFALSEW,
25780 IX86_BUILTIN_VPCOMTRUEW,
25782 IX86_BUILTIN_VPCOMEQD,
25783 IX86_BUILTIN_VPCOMNED,
25784 IX86_BUILTIN_VPCOMLTD,
25785 IX86_BUILTIN_VPCOMLED,
25786 IX86_BUILTIN_VPCOMGTD,
25787 IX86_BUILTIN_VPCOMGED,
25788 IX86_BUILTIN_VPCOMFALSED,
25789 IX86_BUILTIN_VPCOMTRUED,
25791 IX86_BUILTIN_VPCOMEQQ,
25792 IX86_BUILTIN_VPCOMNEQ,
25793 IX86_BUILTIN_VPCOMLTQ,
25794 IX86_BUILTIN_VPCOMLEQ,
25795 IX86_BUILTIN_VPCOMGTQ,
25796 IX86_BUILTIN_VPCOMGEQ,
25797 IX86_BUILTIN_VPCOMFALSEQ,
25798 IX86_BUILTIN_VPCOMTRUEQ,
25800 /* LWP instructions. */
25801 IX86_BUILTIN_LLWPCB,
25802 IX86_BUILTIN_SLWPCB,
25803 IX86_BUILTIN_LWPVAL32,
25804 IX86_BUILTIN_LWPVAL64,
25805 IX86_BUILTIN_LWPINS32,
25806 IX86_BUILTIN_LWPINS64,
25810 /* BMI instructions. */
25811 IX86_BUILTIN_BEXTR32,
25812 IX86_BUILTIN_BEXTR64,
25815 /* TBM instructions. */
25816 IX86_BUILTIN_BEXTRI32,
25817 IX86_BUILTIN_BEXTRI64,
25819 /* BMI2 instructions. */
25820 IX86_BUILTIN_BZHI32,
25821 IX86_BUILTIN_BZHI64,
25822 IX86_BUILTIN_PDEP32,
25823 IX86_BUILTIN_PDEP64,
25824 IX86_BUILTIN_PEXT32,
25825 IX86_BUILTIN_PEXT64,
25827 /* FSGSBASE instructions. */
25828 IX86_BUILTIN_RDFSBASE32,
25829 IX86_BUILTIN_RDFSBASE64,
25830 IX86_BUILTIN_RDGSBASE32,
25831 IX86_BUILTIN_RDGSBASE64,
25832 IX86_BUILTIN_WRFSBASE32,
25833 IX86_BUILTIN_WRFSBASE64,
25834 IX86_BUILTIN_WRGSBASE32,
25835 IX86_BUILTIN_WRGSBASE64,
25837 /* RDRND instructions. */
25838 IX86_BUILTIN_RDRAND16_STEP,
25839 IX86_BUILTIN_RDRAND32_STEP,
25840 IX86_BUILTIN_RDRAND64_STEP,
25842 /* F16C instructions. */
25843 IX86_BUILTIN_CVTPH2PS,
25844 IX86_BUILTIN_CVTPH2PS256,
25845 IX86_BUILTIN_CVTPS2PH,
25846 IX86_BUILTIN_CVTPS2PH256,
25848 /* CFString built-in for darwin */
25849 IX86_BUILTIN_CFSTRING,
25854 /* Table for the ix86 builtin decls. */
25855 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
25857 /* Table of all of the builtin functions that are possible with different ISA's
25858 but are waiting to be built until a function is declared to use that
25860 struct builtin_isa {
25861 const char *name; /* function name */
25862 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
25863 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
25864 bool const_p; /* true if the declaration is constant */
25865 bool set_and_not_built_p;
25868 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
25871 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25872 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25873 function decl in the ix86_builtins array. Returns the function decl or
25874 NULL_TREE, if the builtin was not added.
25876 If the front end has a special hook for builtin functions, delay adding
25877 builtin functions that aren't in the current ISA until the ISA is changed
25878 with function specific optimization. Doing so, can save about 300K for the
25879 default compiler. When the builtin is expanded, check at that time whether
25882 If the front end doesn't have a special hook, record all builtins, even if
25883 it isn't an instruction set in the current ISA in case the user uses
25884 function specific options for a different ISA, so that we don't get scope
25885 errors if a builtin is added in the middle of a function scope. */
25888 def_builtin (HOST_WIDE_INT mask, const char *name,
25889 enum ix86_builtin_func_type tcode,
25890 enum ix86_builtins code)
25892 tree decl = NULL_TREE;
25894 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
25896 ix86_builtins_isa[(int) code].isa = mask;
25898 mask &= ~OPTION_MASK_ISA_64BIT;
25900 || (mask & ix86_isa_flags) != 0
25901 || (lang_hooks.builtin_function
25902 == lang_hooks.builtin_function_ext_scope))
25905 tree type = ix86_get_builtin_func_type (tcode);
25906 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
25908 ix86_builtins[(int) code] = decl;
25909 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
25913 ix86_builtins[(int) code] = NULL_TREE;
25914 ix86_builtins_isa[(int) code].tcode = tcode;
25915 ix86_builtins_isa[(int) code].name = name;
25916 ix86_builtins_isa[(int) code].const_p = false;
25917 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
25924 /* Like def_builtin, but also marks the function decl "const". */
25927 def_builtin_const (HOST_WIDE_INT mask, const char *name,
25928 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
25930 tree decl = def_builtin (mask, name, tcode, code);
25932 TREE_READONLY (decl) = 1;
25934 ix86_builtins_isa[(int) code].const_p = true;
25939 /* Add any new builtin functions for a given ISA that may not have been
25940 declared. This saves a bit of space compared to adding all of the
25941 declarations to the tree, even if we didn't use them. */
25944 ix86_add_new_builtins (HOST_WIDE_INT isa)
25948 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
25950 if ((ix86_builtins_isa[i].isa & isa) != 0
25951 && ix86_builtins_isa[i].set_and_not_built_p)
25955 /* Don't define the builtin again. */
25956 ix86_builtins_isa[i].set_and_not_built_p = false;
25958 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
25959 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
25960 type, i, BUILT_IN_MD, NULL,
25963 ix86_builtins[i] = decl;
25964 if (ix86_builtins_isa[i].const_p)
25965 TREE_READONLY (decl) = 1;
25970 /* Bits for builtin_description.flag. */
25972 /* Set when we don't support the comparison natively, and should
25973 swap_comparison in order to support it. */
25974 #define BUILTIN_DESC_SWAP_OPERANDS 1
25976 struct builtin_description
25978 const HOST_WIDE_INT mask;
25979 const enum insn_code icode;
25980 const char *const name;
25981 const enum ix86_builtins code;
25982 const enum rtx_code comparison;
25986 static const struct builtin_description bdesc_comi[] =
25988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
25989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
25990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
25991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
25992 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
25993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
25994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
25995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
25996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
25997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
25998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
25999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
26000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
26001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
26002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
26003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
26004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
26005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
26006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
26007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
26008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
26009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
26010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
26011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
26014 static const struct builtin_description bdesc_pcmpestr[] =
26017 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
26018 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
26019 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
26020 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
26021 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
26022 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
26023 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
26026 static const struct builtin_description bdesc_pcmpistr[] =
26029 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
26030 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
26031 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
26032 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
26033 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
26034 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
26035 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
26038 /* Special builtins with variable number of arguments. */
26039 static const struct builtin_description bdesc_special_args[] =
26041 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
26042 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
26043 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
26046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26049 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26061 /* SSE or 3DNow!A */
26062 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26063 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
26066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
26070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
26072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
26073 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
26074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
26075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26081 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26084 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
26087 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26088 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
26092 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
26094 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26095 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26096 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
26098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
26100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
26106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
26109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
26113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
26114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
26115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
26116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
26117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
26118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
26119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
26122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
26123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
26124 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
26125 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
26126 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
26127 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
26128 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
26129 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
26130 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
26132 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
26133 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
26134 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
26135 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
26136 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
26137 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
26140 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26141 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26142 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26143 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26144 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26145 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26146 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26147 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26150 /* Builtins with variable number of arguments. */
26151 static const struct builtin_description bdesc_args[] =
26153 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
26154 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
26155 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
26156 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26157 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26158 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26159 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
26198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
26200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
26202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
26206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26226 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26227 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26228 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26229 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26231 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26232 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26233 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26234 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26235 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26236 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26237 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26238 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26239 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26240 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26241 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26242 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26243 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26244 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26245 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26248 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26249 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26250 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26251 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26252 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26253 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
26257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26258 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26260 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26264 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26267 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26271 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26272 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26273 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
26291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26303 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26304 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26308 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26310 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26311 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26313 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26318 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26319 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
26322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
26323 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
26325 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
26327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26331 /* SSE MMX or 3Dnow!A */
26332 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26333 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26334 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26336 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26337 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26338 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26339 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26341 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
26342 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
26344 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
26347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
26350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
26351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
26353 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
26355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
26358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
26363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26365 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26366 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
26370 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26372 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26373 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26374 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26375 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
26386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26403 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26407 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26409 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26410 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26412 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26415 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26416 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26418 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26420 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26421 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26422 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26423 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26424 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26425 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26426 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26427 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26438 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26439 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
26441 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26443 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26444 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26456 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26457 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26458 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26461 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26462 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26463 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26464 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26465 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26466 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26467 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26468 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26474 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
26477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
26478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
26482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
26483 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
26484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
26485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
26487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26488 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26489 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26490 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26491 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26492 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26493 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26496 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26497 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26498 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26499 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26500 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26501 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26503 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26504 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26505 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26506 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
26509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
26514 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
26515 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
26517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26520 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26521 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26524 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
26525 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26527 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26528 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26529 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26530 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26531 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26532 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26535 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26536 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
26537 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26538 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
26539 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26540 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26542 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26543 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26544 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26545 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26546 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26547 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26548 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26549 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26550 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26551 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26552 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26553 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26554 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
26555 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
26556 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26557 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26558 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26560 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
26569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
26572 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26573 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26574 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
26575 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
26576 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26577 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26578 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26579 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
26580 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
26581 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
26583 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26584 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26585 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26586 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26587 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26588 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26589 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26590 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26591 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26592 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26593 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26611 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26612 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26613 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26614 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26616 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
26617 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
26618 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
26619 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
26621 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26622 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26624 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26625 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26627 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
26628 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
26629 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
26630 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
26632 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
26633 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
26635 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26636 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26643 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26644 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
26645 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
26646 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26647 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26650 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
26651 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
26652 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
26653 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26656 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
26657 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26659 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26660 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26661 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26662 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26665 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
26668 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26669 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26672 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26673 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26676 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26682 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26683 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26684 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26685 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26686 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26687 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26688 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26689 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26690 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26691 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26692 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26693 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
26696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
26697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
26698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
26703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
26704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
26714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
26715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
26716 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
26717 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
26718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
26719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
26721 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26723 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
26732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
26733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
26735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26739 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26741 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26743 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
26751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
26752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
26753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
26755 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26756 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
26762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
26763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
26764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
26766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
26767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
26769 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26770 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26780 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
26781 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
26782 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
26784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
26801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
26803 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26804 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26806 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
26810 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
26811 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
26812 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
26813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26817 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26818 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26819 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26820 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
26826 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26827 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
26831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
26832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
26848 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26849 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26850 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26851 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26852 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26853 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26854 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26855 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26856 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26857 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26858 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26859 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
26861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26875 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26876 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26877 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26878 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26880 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
26884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26890 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26891 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26892 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26893 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26894 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26895 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26896 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26897 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26898 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26899 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26901 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26902 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26903 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26904 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26905 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26906 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26907 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26908 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26909 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26910 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26923 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
26929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
26931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
26932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
26943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
26944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
26945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26956 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26959 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26960 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26961 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26964 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26965 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26968 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
26969 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
26970 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
26971 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
26974 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26975 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26976 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26977 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26978 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26979 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26982 /* FMA4 and XOP. */
26983 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26984 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26985 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26986 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26987 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26988 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26989 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26990 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26991 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26992 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26993 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26994 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26995 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26996 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26997 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26998 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26999 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27000 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27001 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27002 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27003 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27004 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27005 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27006 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27007 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27008 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27009 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27010 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27011 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27012 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27013 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27014 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27015 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27016 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27017 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27018 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27019 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27020 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27021 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27022 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27023 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27024 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27025 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27026 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27027 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27028 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27029 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27030 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27031 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27032 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27033 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27034 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27036 static const struct builtin_description bdesc_multi_arg[] =
27038 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
27039 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
27040 UNKNOWN, (int)MULTI_ARG_3_SF },
27041 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
27042 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
27043 UNKNOWN, (int)MULTI_ARG_3_DF },
27045 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
27046 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
27047 UNKNOWN, (int)MULTI_ARG_3_SF },
27048 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
27049 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
27050 UNKNOWN, (int)MULTI_ARG_3_DF },
27052 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
27053 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
27054 UNKNOWN, (int)MULTI_ARG_3_SF },
27055 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
27056 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
27057 UNKNOWN, (int)MULTI_ARG_3_DF },
27058 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
27059 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
27060 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27061 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
27062 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
27063 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27065 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
27066 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
27067 UNKNOWN, (int)MULTI_ARG_3_SF },
27068 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
27069 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
27070 UNKNOWN, (int)MULTI_ARG_3_DF },
27071 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
27072 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
27073 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27074 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
27075 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
27076 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
27079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
27080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
27081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
27082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
27083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
27084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
27086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
27089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
27090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
27091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
27092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
27094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
27096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
27111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
27112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
27113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
27114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
27115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
27116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
27117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
27119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
27120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
27121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
27123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
27124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
27126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
27127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
27128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
27129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
27130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
27131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
27133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
27150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
27153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
27154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
27155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
27157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
27158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
27161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
27162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
27163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
27165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
27166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
27169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
27170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
27171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
27173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
27177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
27178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
27179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
27181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
27182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
27185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
27186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
27187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
27189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
27190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
27193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
27194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
27195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
27197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
27198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
27201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
27202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
27203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
27205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
27209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
27210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
27211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
27213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
27232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
27233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
27234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
27238 /* TM vector builtins. */
27240 /* Reuse the existing x86-specific `struct builtin_description' cause
27241 we're lazy. Add casts to make them fit. */
27242 static const struct builtin_description bdesc_tm[] =
27244 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27245 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27246 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27247 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27248 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27249 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27250 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27252 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27253 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27254 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27255 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27256 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27257 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27258 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27260 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27261 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27262 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27263 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27264 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27265 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27266 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27268 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
27269 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
27270 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
27273 /* TM callbacks. */
27275 /* Return the builtin decl needed to load a vector of TYPE. */
27278 ix86_builtin_tm_load (tree type)
27280 if (TREE_CODE (type) == VECTOR_TYPE)
27282 switch (tree_low_cst (TYPE_SIZE (type), 1))
27285 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
27287 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
27289 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
27295 /* Return the builtin decl needed to store a vector of TYPE. */
27298 ix86_builtin_tm_store (tree type)
27300 if (TREE_CODE (type) == VECTOR_TYPE)
27302 switch (tree_low_cst (TYPE_SIZE (type), 1))
27305 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
27307 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
27309 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
27315 /* Initialize the transactional memory vector load/store builtins. */
27318 ix86_init_tm_builtins (void)
27320 enum ix86_builtin_func_type ftype;
27321 const struct builtin_description *d;
27324 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
27325 tree attrs_log, attrs_type_log;
27330 /* If there are no builtins defined, we must be compiling in a
27331 language without trans-mem support. */
27332 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
27335 /* Use whatever attributes a normal TM load has. */
27336 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
27337 attrs_load = DECL_ATTRIBUTES (decl);
27338 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27339 /* Use whatever attributes a normal TM store has. */
27340 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
27341 attrs_store = DECL_ATTRIBUTES (decl);
27342 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27343 /* Use whatever attributes a normal TM log has. */
27344 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
27345 attrs_log = DECL_ATTRIBUTES (decl);
27346 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27348 for (i = 0, d = bdesc_tm;
27349 i < ARRAY_SIZE (bdesc_tm);
27352 if ((d->mask & ix86_isa_flags) != 0
27353 || (lang_hooks.builtin_function
27354 == lang_hooks.builtin_function_ext_scope))
27356 tree type, attrs, attrs_type;
27357 enum built_in_function code = (enum built_in_function) d->code;
27359 ftype = (enum ix86_builtin_func_type) d->flag;
27360 type = ix86_get_builtin_func_type (ftype);
27362 if (BUILTIN_TM_LOAD_P (code))
27364 attrs = attrs_load;
27365 attrs_type = attrs_type_load;
27367 else if (BUILTIN_TM_STORE_P (code))
27369 attrs = attrs_store;
27370 attrs_type = attrs_type_store;
27375 attrs_type = attrs_type_log;
27377 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
27378 /* The builtin without the prefix for
27379 calling it directly. */
27380 d->name + strlen ("__builtin_"),
27382 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27383 set the TYPE_ATTRIBUTES. */
27384 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
27386 set_builtin_decl (code, decl, false);
27391 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27392 in the current target ISA to allow the user to compile particular modules
27393 with different target specific options that differ from the command line
27396 ix86_init_mmx_sse_builtins (void)
27398 const struct builtin_description * d;
27399 enum ix86_builtin_func_type ftype;
27402 /* Add all special builtins with variable number of operands. */
27403 for (i = 0, d = bdesc_special_args;
27404 i < ARRAY_SIZE (bdesc_special_args);
27410 ftype = (enum ix86_builtin_func_type) d->flag;
27411 def_builtin (d->mask, d->name, ftype, d->code);
27414 /* Add all builtins with variable number of operands. */
27415 for (i = 0, d = bdesc_args;
27416 i < ARRAY_SIZE (bdesc_args);
27422 ftype = (enum ix86_builtin_func_type) d->flag;
27423 def_builtin_const (d->mask, d->name, ftype, d->code);
27426 /* pcmpestr[im] insns. */
27427 for (i = 0, d = bdesc_pcmpestr;
27428 i < ARRAY_SIZE (bdesc_pcmpestr);
27431 if (d->code == IX86_BUILTIN_PCMPESTRM128)
27432 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
27434 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
27435 def_builtin_const (d->mask, d->name, ftype, d->code);
27438 /* pcmpistr[im] insns. */
27439 for (i = 0, d = bdesc_pcmpistr;
27440 i < ARRAY_SIZE (bdesc_pcmpistr);
27443 if (d->code == IX86_BUILTIN_PCMPISTRM128)
27444 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
27446 ftype = INT_FTYPE_V16QI_V16QI_INT;
27447 def_builtin_const (d->mask, d->name, ftype, d->code);
27450 /* comi/ucomi insns. */
27451 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27453 if (d->mask == OPTION_MASK_ISA_SSE2)
27454 ftype = INT_FTYPE_V2DF_V2DF;
27456 ftype = INT_FTYPE_V4SF_V4SF;
27457 def_builtin_const (d->mask, d->name, ftype, d->code);
27461 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
27462 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
27463 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
27464 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
27466 /* SSE or 3DNow!A */
27467 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27468 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
27469 IX86_BUILTIN_MASKMOVQ);
27472 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
27473 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
27475 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
27476 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
27477 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
27478 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
27481 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
27482 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
27483 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
27484 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
27487 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
27488 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
27489 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
27490 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
27491 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
27492 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
27493 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
27494 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
27495 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
27496 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
27497 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
27498 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
27501 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
27502 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
27505 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
27506 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
27507 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
27508 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
27509 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
27510 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
27511 IX86_BUILTIN_RDRAND64_STEP);
27514 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
27515 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
27516 IX86_BUILTIN_GATHERSIV2DF);
27518 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
27519 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
27520 IX86_BUILTIN_GATHERSIV4DF);
27522 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
27523 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
27524 IX86_BUILTIN_GATHERDIV2DF);
27526 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
27527 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
27528 IX86_BUILTIN_GATHERDIV4DF);
27530 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
27531 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
27532 IX86_BUILTIN_GATHERSIV4SF);
27534 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
27535 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
27536 IX86_BUILTIN_GATHERSIV8SF);
27538 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
27539 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
27540 IX86_BUILTIN_GATHERDIV4SF);
27542 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
27543 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
27544 IX86_BUILTIN_GATHERDIV8SF);
27546 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
27547 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
27548 IX86_BUILTIN_GATHERSIV2DI);
27550 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
27551 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
27552 IX86_BUILTIN_GATHERSIV4DI);
27554 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
27555 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
27556 IX86_BUILTIN_GATHERDIV2DI);
27558 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
27559 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
27560 IX86_BUILTIN_GATHERDIV4DI);
27562 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
27563 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
27564 IX86_BUILTIN_GATHERSIV4SI);
27566 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
27567 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
27568 IX86_BUILTIN_GATHERSIV8SI);
27570 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
27571 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
27572 IX86_BUILTIN_GATHERDIV4SI);
27574 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
27575 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
27576 IX86_BUILTIN_GATHERDIV8SI);
27578 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
27579 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
27580 IX86_BUILTIN_GATHERALTSIV4DF);
27582 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
27583 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
27584 IX86_BUILTIN_GATHERALTDIV8SF);
27586 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
27587 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
27588 IX86_BUILTIN_GATHERALTSIV4DI);
27590 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
27591 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
27592 IX86_BUILTIN_GATHERALTDIV8SI);
27594 /* MMX access to the vec_init patterns. */
27595 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
27596 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
27598 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
27599 V4HI_FTYPE_HI_HI_HI_HI,
27600 IX86_BUILTIN_VEC_INIT_V4HI);
27602 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
27603 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
27604 IX86_BUILTIN_VEC_INIT_V8QI);
27606 /* Access to the vec_extract patterns. */
27607 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
27608 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
27609 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
27610 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
27611 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
27612 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
27613 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
27614 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
27615 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
27616 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
27618 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27619 "__builtin_ia32_vec_ext_v4hi",
27620 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
27622 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
27623 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
27625 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
27626 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
27628 /* Access to the vec_set patterns. */
27629 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
27630 "__builtin_ia32_vec_set_v2di",
27631 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
27633 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
27634 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
27636 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
27637 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
27639 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
27640 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
27642 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27643 "__builtin_ia32_vec_set_v4hi",
27644 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
27646 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
27647 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
27649 /* Add FMA4 multi-arg argument instructions */
27650 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27655 ftype = (enum ix86_builtin_func_type) d->flag;
27656 def_builtin_const (d->mask, d->name, ftype, d->code);
27660 /* Internal method for ix86_init_builtins. */
27663 ix86_init_builtins_va_builtins_abi (void)
27665 tree ms_va_ref, sysv_va_ref;
27666 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
27667 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
27668 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
27669 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
27673 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
27674 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
27675 ms_va_ref = build_reference_type (ms_va_list_type_node);
27677 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
27680 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27681 fnvoid_va_start_ms =
27682 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27683 fnvoid_va_end_sysv =
27684 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
27685 fnvoid_va_start_sysv =
27686 build_varargs_function_type_list (void_type_node, sysv_va_ref,
27688 fnvoid_va_copy_ms =
27689 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
27691 fnvoid_va_copy_sysv =
27692 build_function_type_list (void_type_node, sysv_va_ref,
27693 sysv_va_ref, NULL_TREE);
27695 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
27696 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
27697 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
27698 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
27699 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
27700 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
27701 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
27702 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27703 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
27704 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27705 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
27706 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27710 ix86_init_builtin_types (void)
27712 tree float128_type_node, float80_type_node;
27714 /* The __float80 type. */
27715 float80_type_node = long_double_type_node;
27716 if (TYPE_MODE (float80_type_node) != XFmode)
27718 /* The __float80 type. */
27719 float80_type_node = make_node (REAL_TYPE);
27721 TYPE_PRECISION (float80_type_node) = 80;
27722 layout_type (float80_type_node);
27724 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
27726 /* The __float128 type. */
27727 float128_type_node = make_node (REAL_TYPE);
27728 TYPE_PRECISION (float128_type_node) = 128;
27729 layout_type (float128_type_node);
27730 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
27732 /* This macro is built by i386-builtin-types.awk. */
27733 DEFINE_BUILTIN_PRIMITIVE_TYPES;
27737 ix86_init_builtins (void)
27741 ix86_init_builtin_types ();
27743 /* TFmode support builtins. */
27744 def_builtin_const (0, "__builtin_infq",
27745 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
27746 def_builtin_const (0, "__builtin_huge_valq",
27747 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
27749 /* We will expand them to normal call if SSE2 isn't available since
27750 they are used by libgcc. */
27751 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
27752 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
27753 BUILT_IN_MD, "__fabstf2", NULL_TREE);
27754 TREE_READONLY (t) = 1;
27755 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
27757 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
27758 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
27759 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
27760 TREE_READONLY (t) = 1;
27761 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
27763 ix86_init_tm_builtins ();
27764 ix86_init_mmx_sse_builtins ();
27767 ix86_init_builtins_va_builtins_abi ();
27769 #ifdef SUBTARGET_INIT_BUILTINS
27770 SUBTARGET_INIT_BUILTINS;
27774 /* Return the ix86 builtin for CODE. */
27777 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
27779 if (code >= IX86_BUILTIN_MAX)
27780 return error_mark_node;
27782 return ix86_builtins[code];
27785 /* Errors in the source file can cause expand_expr to return const0_rtx
27786 where we expect a vector. To avoid crashing, use one of the vector
27787 clear instructions. */
27789 safe_vector_operand (rtx x, enum machine_mode mode)
27791 if (x == const0_rtx)
27792 x = CONST0_RTX (mode);
27796 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27799 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
27802 tree arg0 = CALL_EXPR_ARG (exp, 0);
27803 tree arg1 = CALL_EXPR_ARG (exp, 1);
27804 rtx op0 = expand_normal (arg0);
27805 rtx op1 = expand_normal (arg1);
27806 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27807 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27808 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
27810 if (VECTOR_MODE_P (mode0))
27811 op0 = safe_vector_operand (op0, mode0);
27812 if (VECTOR_MODE_P (mode1))
27813 op1 = safe_vector_operand (op1, mode1);
27815 if (optimize || !target
27816 || GET_MODE (target) != tmode
27817 || !insn_data[icode].operand[0].predicate (target, tmode))
27818 target = gen_reg_rtx (tmode);
27820 if (GET_MODE (op1) == SImode && mode1 == TImode)
27822 rtx x = gen_reg_rtx (V4SImode);
27823 emit_insn (gen_sse2_loadd (x, op1));
27824 op1 = gen_lowpart (TImode, x);
27827 if (!insn_data[icode].operand[1].predicate (op0, mode0))
27828 op0 = copy_to_mode_reg (mode0, op0);
27829 if (!insn_data[icode].operand[2].predicate (op1, mode1))
27830 op1 = copy_to_mode_reg (mode1, op1);
27832 pat = GEN_FCN (icode) (target, op0, op1);
27841 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27844 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
27845 enum ix86_builtin_func_type m_type,
27846 enum rtx_code sub_code)
27851 bool comparison_p = false;
27853 bool last_arg_constant = false;
27854 int num_memory = 0;
27857 enum machine_mode mode;
27860 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27864 case MULTI_ARG_4_DF2_DI_I:
27865 case MULTI_ARG_4_DF2_DI_I1:
27866 case MULTI_ARG_4_SF2_SI_I:
27867 case MULTI_ARG_4_SF2_SI_I1:
27869 last_arg_constant = true;
27872 case MULTI_ARG_3_SF:
27873 case MULTI_ARG_3_DF:
27874 case MULTI_ARG_3_SF2:
27875 case MULTI_ARG_3_DF2:
27876 case MULTI_ARG_3_DI:
27877 case MULTI_ARG_3_SI:
27878 case MULTI_ARG_3_SI_DI:
27879 case MULTI_ARG_3_HI:
27880 case MULTI_ARG_3_HI_SI:
27881 case MULTI_ARG_3_QI:
27882 case MULTI_ARG_3_DI2:
27883 case MULTI_ARG_3_SI2:
27884 case MULTI_ARG_3_HI2:
27885 case MULTI_ARG_3_QI2:
27889 case MULTI_ARG_2_SF:
27890 case MULTI_ARG_2_DF:
27891 case MULTI_ARG_2_DI:
27892 case MULTI_ARG_2_SI:
27893 case MULTI_ARG_2_HI:
27894 case MULTI_ARG_2_QI:
27898 case MULTI_ARG_2_DI_IMM:
27899 case MULTI_ARG_2_SI_IMM:
27900 case MULTI_ARG_2_HI_IMM:
27901 case MULTI_ARG_2_QI_IMM:
27903 last_arg_constant = true;
27906 case MULTI_ARG_1_SF:
27907 case MULTI_ARG_1_DF:
27908 case MULTI_ARG_1_SF2:
27909 case MULTI_ARG_1_DF2:
27910 case MULTI_ARG_1_DI:
27911 case MULTI_ARG_1_SI:
27912 case MULTI_ARG_1_HI:
27913 case MULTI_ARG_1_QI:
27914 case MULTI_ARG_1_SI_DI:
27915 case MULTI_ARG_1_HI_DI:
27916 case MULTI_ARG_1_HI_SI:
27917 case MULTI_ARG_1_QI_DI:
27918 case MULTI_ARG_1_QI_SI:
27919 case MULTI_ARG_1_QI_HI:
27923 case MULTI_ARG_2_DI_CMP:
27924 case MULTI_ARG_2_SI_CMP:
27925 case MULTI_ARG_2_HI_CMP:
27926 case MULTI_ARG_2_QI_CMP:
27928 comparison_p = true;
27931 case MULTI_ARG_2_SF_TF:
27932 case MULTI_ARG_2_DF_TF:
27933 case MULTI_ARG_2_DI_TF:
27934 case MULTI_ARG_2_SI_TF:
27935 case MULTI_ARG_2_HI_TF:
27936 case MULTI_ARG_2_QI_TF:
27942 gcc_unreachable ();
27945 if (optimize || !target
27946 || GET_MODE (target) != tmode
27947 || !insn_data[icode].operand[0].predicate (target, tmode))
27948 target = gen_reg_rtx (tmode);
27950 gcc_assert (nargs <= 4);
27952 for (i = 0; i < nargs; i++)
27954 tree arg = CALL_EXPR_ARG (exp, i);
27955 rtx op = expand_normal (arg);
27956 int adjust = (comparison_p) ? 1 : 0;
27957 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
27959 if (last_arg_constant && i == nargs - 1)
27961 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
27963 enum insn_code new_icode = icode;
27966 case CODE_FOR_xop_vpermil2v2df3:
27967 case CODE_FOR_xop_vpermil2v4sf3:
27968 case CODE_FOR_xop_vpermil2v4df3:
27969 case CODE_FOR_xop_vpermil2v8sf3:
27970 error ("the last argument must be a 2-bit immediate");
27971 return gen_reg_rtx (tmode);
27972 case CODE_FOR_xop_rotlv2di3:
27973 new_icode = CODE_FOR_rotlv2di3;
27975 case CODE_FOR_xop_rotlv4si3:
27976 new_icode = CODE_FOR_rotlv4si3;
27978 case CODE_FOR_xop_rotlv8hi3:
27979 new_icode = CODE_FOR_rotlv8hi3;
27981 case CODE_FOR_xop_rotlv16qi3:
27982 new_icode = CODE_FOR_rotlv16qi3;
27984 if (CONST_INT_P (op))
27986 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
27987 op = GEN_INT (INTVAL (op) & mask);
27988 gcc_checking_assert
27989 (insn_data[icode].operand[i + 1].predicate (op, mode));
27993 gcc_checking_assert
27995 && insn_data[new_icode].operand[0].mode == tmode
27996 && insn_data[new_icode].operand[1].mode == tmode
27997 && insn_data[new_icode].operand[2].mode == mode
27998 && insn_data[new_icode].operand[0].predicate
27999 == insn_data[icode].operand[0].predicate
28000 && insn_data[new_icode].operand[1].predicate
28001 == insn_data[icode].operand[1].predicate);
28007 gcc_unreachable ();
28014 if (VECTOR_MODE_P (mode))
28015 op = safe_vector_operand (op, mode);
28017 /* If we aren't optimizing, only allow one memory operand to be
28019 if (memory_operand (op, mode))
28022 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
28025 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
28027 op = force_reg (mode, op);
28031 args[i].mode = mode;
28037 pat = GEN_FCN (icode) (target, args[0].op);
28042 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
28043 GEN_INT ((int)sub_code));
28044 else if (! comparison_p)
28045 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
28048 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
28052 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
28057 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
28061 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
28065 gcc_unreachable ();
28075 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28076 insns with vec_merge. */
28079 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
28083 tree arg0 = CALL_EXPR_ARG (exp, 0);
28084 rtx op1, op0 = expand_normal (arg0);
28085 enum machine_mode tmode = insn_data[icode].operand[0].mode;
28086 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
28088 if (optimize || !target
28089 || GET_MODE (target) != tmode
28090 || !insn_data[icode].operand[0].predicate (target, tmode))
28091 target = gen_reg_rtx (tmode);
28093 if (VECTOR_MODE_P (mode0))
28094 op0 = safe_vector_operand (op0, mode0);
28096 if ((optimize && !register_operand (op0, mode0))
28097 || !insn_data[icode].operand[1].predicate (op0, mode0))
28098 op0 = copy_to_mode_reg (mode0, op0);
28101 if (!insn_data[icode].operand[2].predicate (op1, mode0))
28102 op1 = copy_to_mode_reg (mode0, op1);
28104 pat = GEN_FCN (icode) (target, op0, op1);
28111 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28114 ix86_expand_sse_compare (const struct builtin_description *d,
28115 tree exp, rtx target, bool swap)
28118 tree arg0 = CALL_EXPR_ARG (exp, 0);
28119 tree arg1 = CALL_EXPR_ARG (exp, 1);
28120 rtx op0 = expand_normal (arg0);
28121 rtx op1 = expand_normal (arg1);
28123 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28124 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28125 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28126 enum rtx_code comparison = d->comparison;
28128 if (VECTOR_MODE_P (mode0))
28129 op0 = safe_vector_operand (op0, mode0);
28130 if (VECTOR_MODE_P (mode1))
28131 op1 = safe_vector_operand (op1, mode1);
28133 /* Swap operands if we have a comparison that isn't available in
28137 rtx tmp = gen_reg_rtx (mode1);
28138 emit_move_insn (tmp, op1);
28143 if (optimize || !target
28144 || GET_MODE (target) != tmode
28145 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28146 target = gen_reg_rtx (tmode);
28148 if ((optimize && !register_operand (op0, mode0))
28149 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
28150 op0 = copy_to_mode_reg (mode0, op0);
28151 if ((optimize && !register_operand (op1, mode1))
28152 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
28153 op1 = copy_to_mode_reg (mode1, op1);
28155 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
28156 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28163 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28166 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
28170 tree arg0 = CALL_EXPR_ARG (exp, 0);
28171 tree arg1 = CALL_EXPR_ARG (exp, 1);
28172 rtx op0 = expand_normal (arg0);
28173 rtx op1 = expand_normal (arg1);
28174 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28175 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28176 enum rtx_code comparison = d->comparison;
28178 if (VECTOR_MODE_P (mode0))
28179 op0 = safe_vector_operand (op0, mode0);
28180 if (VECTOR_MODE_P (mode1))
28181 op1 = safe_vector_operand (op1, mode1);
28183 /* Swap operands if we have a comparison that isn't available in
28185 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
28192 target = gen_reg_rtx (SImode);
28193 emit_move_insn (target, const0_rtx);
28194 target = gen_rtx_SUBREG (QImode, target, 0);
28196 if ((optimize && !register_operand (op0, mode0))
28197 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28198 op0 = copy_to_mode_reg (mode0, op0);
28199 if ((optimize && !register_operand (op1, mode1))
28200 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28201 op1 = copy_to_mode_reg (mode1, op1);
28203 pat = GEN_FCN (d->icode) (op0, op1);
28207 emit_insn (gen_rtx_SET (VOIDmode,
28208 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28209 gen_rtx_fmt_ee (comparison, QImode,
28213 return SUBREG_REG (target);
28216 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28219 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
28223 tree arg0 = CALL_EXPR_ARG (exp, 0);
28224 rtx op1, op0 = expand_normal (arg0);
28225 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28226 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28228 if (optimize || target == 0
28229 || GET_MODE (target) != tmode
28230 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28231 target = gen_reg_rtx (tmode);
28233 if (VECTOR_MODE_P (mode0))
28234 op0 = safe_vector_operand (op0, mode0);
28236 if ((optimize && !register_operand (op0, mode0))
28237 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28238 op0 = copy_to_mode_reg (mode0, op0);
28240 op1 = GEN_INT (d->comparison);
28242 pat = GEN_FCN (d->icode) (target, op0, op1);
28250 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
28251 tree exp, rtx target)
28254 tree arg0 = CALL_EXPR_ARG (exp, 0);
28255 tree arg1 = CALL_EXPR_ARG (exp, 1);
28256 rtx op0 = expand_normal (arg0);
28257 rtx op1 = expand_normal (arg1);
28259 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28260 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28261 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28263 if (optimize || target == 0
28264 || GET_MODE (target) != tmode
28265 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28266 target = gen_reg_rtx (tmode);
28268 op0 = safe_vector_operand (op0, mode0);
28269 op1 = safe_vector_operand (op1, mode1);
28271 if ((optimize && !register_operand (op0, mode0))
28272 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28273 op0 = copy_to_mode_reg (mode0, op0);
28274 if ((optimize && !register_operand (op1, mode1))
28275 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28276 op1 = copy_to_mode_reg (mode1, op1);
28278 op2 = GEN_INT (d->comparison);
28280 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28287 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28290 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
28294 tree arg0 = CALL_EXPR_ARG (exp, 0);
28295 tree arg1 = CALL_EXPR_ARG (exp, 1);
28296 rtx op0 = expand_normal (arg0);
28297 rtx op1 = expand_normal (arg1);
28298 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28299 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28300 enum rtx_code comparison = d->comparison;
28302 if (VECTOR_MODE_P (mode0))
28303 op0 = safe_vector_operand (op0, mode0);
28304 if (VECTOR_MODE_P (mode1))
28305 op1 = safe_vector_operand (op1, mode1);
28307 target = gen_reg_rtx (SImode);
28308 emit_move_insn (target, const0_rtx);
28309 target = gen_rtx_SUBREG (QImode, target, 0);
28311 if ((optimize && !register_operand (op0, mode0))
28312 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28313 op0 = copy_to_mode_reg (mode0, op0);
28314 if ((optimize && !register_operand (op1, mode1))
28315 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28316 op1 = copy_to_mode_reg (mode1, op1);
28318 pat = GEN_FCN (d->icode) (op0, op1);
28322 emit_insn (gen_rtx_SET (VOIDmode,
28323 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28324 gen_rtx_fmt_ee (comparison, QImode,
28328 return SUBREG_REG (target);
28331 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28334 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
28335 tree exp, rtx target)
28338 tree arg0 = CALL_EXPR_ARG (exp, 0);
28339 tree arg1 = CALL_EXPR_ARG (exp, 1);
28340 tree arg2 = CALL_EXPR_ARG (exp, 2);
28341 tree arg3 = CALL_EXPR_ARG (exp, 3);
28342 tree arg4 = CALL_EXPR_ARG (exp, 4);
28343 rtx scratch0, scratch1;
28344 rtx op0 = expand_normal (arg0);
28345 rtx op1 = expand_normal (arg1);
28346 rtx op2 = expand_normal (arg2);
28347 rtx op3 = expand_normal (arg3);
28348 rtx op4 = expand_normal (arg4);
28349 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
28351 tmode0 = insn_data[d->icode].operand[0].mode;
28352 tmode1 = insn_data[d->icode].operand[1].mode;
28353 modev2 = insn_data[d->icode].operand[2].mode;
28354 modei3 = insn_data[d->icode].operand[3].mode;
28355 modev4 = insn_data[d->icode].operand[4].mode;
28356 modei5 = insn_data[d->icode].operand[5].mode;
28357 modeimm = insn_data[d->icode].operand[6].mode;
28359 if (VECTOR_MODE_P (modev2))
28360 op0 = safe_vector_operand (op0, modev2);
28361 if (VECTOR_MODE_P (modev4))
28362 op2 = safe_vector_operand (op2, modev4);
28364 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28365 op0 = copy_to_mode_reg (modev2, op0);
28366 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
28367 op1 = copy_to_mode_reg (modei3, op1);
28368 if ((optimize && !register_operand (op2, modev4))
28369 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
28370 op2 = copy_to_mode_reg (modev4, op2);
28371 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
28372 op3 = copy_to_mode_reg (modei5, op3);
28374 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
28376 error ("the fifth argument must be an 8-bit immediate");
28380 if (d->code == IX86_BUILTIN_PCMPESTRI128)
28382 if (optimize || !target
28383 || GET_MODE (target) != tmode0
28384 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28385 target = gen_reg_rtx (tmode0);
28387 scratch1 = gen_reg_rtx (tmode1);
28389 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
28391 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
28393 if (optimize || !target
28394 || GET_MODE (target) != tmode1
28395 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28396 target = gen_reg_rtx (tmode1);
28398 scratch0 = gen_reg_rtx (tmode0);
28400 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
28404 gcc_assert (d->flag);
28406 scratch0 = gen_reg_rtx (tmode0);
28407 scratch1 = gen_reg_rtx (tmode1);
28409 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
28419 target = gen_reg_rtx (SImode);
28420 emit_move_insn (target, const0_rtx);
28421 target = gen_rtx_SUBREG (QImode, target, 0);
28424 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28425 gen_rtx_fmt_ee (EQ, QImode,
28426 gen_rtx_REG ((enum machine_mode) d->flag,
28429 return SUBREG_REG (target);
28436 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28439 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
28440 tree exp, rtx target)
28443 tree arg0 = CALL_EXPR_ARG (exp, 0);
28444 tree arg1 = CALL_EXPR_ARG (exp, 1);
28445 tree arg2 = CALL_EXPR_ARG (exp, 2);
28446 rtx scratch0, scratch1;
28447 rtx op0 = expand_normal (arg0);
28448 rtx op1 = expand_normal (arg1);
28449 rtx op2 = expand_normal (arg2);
28450 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
28452 tmode0 = insn_data[d->icode].operand[0].mode;
28453 tmode1 = insn_data[d->icode].operand[1].mode;
28454 modev2 = insn_data[d->icode].operand[2].mode;
28455 modev3 = insn_data[d->icode].operand[3].mode;
28456 modeimm = insn_data[d->icode].operand[4].mode;
28458 if (VECTOR_MODE_P (modev2))
28459 op0 = safe_vector_operand (op0, modev2);
28460 if (VECTOR_MODE_P (modev3))
28461 op1 = safe_vector_operand (op1, modev3);
28463 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28464 op0 = copy_to_mode_reg (modev2, op0);
28465 if ((optimize && !register_operand (op1, modev3))
28466 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
28467 op1 = copy_to_mode_reg (modev3, op1);
28469 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
28471 error ("the third argument must be an 8-bit immediate");
28475 if (d->code == IX86_BUILTIN_PCMPISTRI128)
28477 if (optimize || !target
28478 || GET_MODE (target) != tmode0
28479 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28480 target = gen_reg_rtx (tmode0);
28482 scratch1 = gen_reg_rtx (tmode1);
28484 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
28486 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
28488 if (optimize || !target
28489 || GET_MODE (target) != tmode1
28490 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28491 target = gen_reg_rtx (tmode1);
28493 scratch0 = gen_reg_rtx (tmode0);
28495 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
28499 gcc_assert (d->flag);
28501 scratch0 = gen_reg_rtx (tmode0);
28502 scratch1 = gen_reg_rtx (tmode1);
28504 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
28514 target = gen_reg_rtx (SImode);
28515 emit_move_insn (target, const0_rtx);
28516 target = gen_rtx_SUBREG (QImode, target, 0);
28519 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28520 gen_rtx_fmt_ee (EQ, QImode,
28521 gen_rtx_REG ((enum machine_mode) d->flag,
28524 return SUBREG_REG (target);
28530 /* Subroutine of ix86_expand_builtin to take care of insns with
28531 variable number of operands. */
28534 ix86_expand_args_builtin (const struct builtin_description *d,
28535 tree exp, rtx target)
28537 rtx pat, real_target;
28538 unsigned int i, nargs;
28539 unsigned int nargs_constant = 0;
28540 int num_memory = 0;
28544 enum machine_mode mode;
28546 bool last_arg_count = false;
28547 enum insn_code icode = d->icode;
28548 const struct insn_data_d *insn_p = &insn_data[icode];
28549 enum machine_mode tmode = insn_p->operand[0].mode;
28550 enum machine_mode rmode = VOIDmode;
28552 enum rtx_code comparison = d->comparison;
28554 switch ((enum ix86_builtin_func_type) d->flag)
28556 case V2DF_FTYPE_V2DF_ROUND:
28557 case V4DF_FTYPE_V4DF_ROUND:
28558 case V4SF_FTYPE_V4SF_ROUND:
28559 case V8SF_FTYPE_V8SF_ROUND:
28560 case V4SI_FTYPE_V4SF_ROUND:
28561 case V8SI_FTYPE_V8SF_ROUND:
28562 return ix86_expand_sse_round (d, exp, target);
28563 case V4SI_FTYPE_V2DF_V2DF_ROUND:
28564 case V8SI_FTYPE_V4DF_V4DF_ROUND:
28565 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
28566 case INT_FTYPE_V8SF_V8SF_PTEST:
28567 case INT_FTYPE_V4DI_V4DI_PTEST:
28568 case INT_FTYPE_V4DF_V4DF_PTEST:
28569 case INT_FTYPE_V4SF_V4SF_PTEST:
28570 case INT_FTYPE_V2DI_V2DI_PTEST:
28571 case INT_FTYPE_V2DF_V2DF_PTEST:
28572 return ix86_expand_sse_ptest (d, exp, target);
28573 case FLOAT128_FTYPE_FLOAT128:
28574 case FLOAT_FTYPE_FLOAT:
28575 case INT_FTYPE_INT:
28576 case UINT64_FTYPE_INT:
28577 case UINT16_FTYPE_UINT16:
28578 case INT64_FTYPE_INT64:
28579 case INT64_FTYPE_V4SF:
28580 case INT64_FTYPE_V2DF:
28581 case INT_FTYPE_V16QI:
28582 case INT_FTYPE_V8QI:
28583 case INT_FTYPE_V8SF:
28584 case INT_FTYPE_V4DF:
28585 case INT_FTYPE_V4SF:
28586 case INT_FTYPE_V2DF:
28587 case INT_FTYPE_V32QI:
28588 case V16QI_FTYPE_V16QI:
28589 case V8SI_FTYPE_V8SF:
28590 case V8SI_FTYPE_V4SI:
28591 case V8HI_FTYPE_V8HI:
28592 case V8HI_FTYPE_V16QI:
28593 case V8QI_FTYPE_V8QI:
28594 case V8SF_FTYPE_V8SF:
28595 case V8SF_FTYPE_V8SI:
28596 case V8SF_FTYPE_V4SF:
28597 case V8SF_FTYPE_V8HI:
28598 case V4SI_FTYPE_V4SI:
28599 case V4SI_FTYPE_V16QI:
28600 case V4SI_FTYPE_V4SF:
28601 case V4SI_FTYPE_V8SI:
28602 case V4SI_FTYPE_V8HI:
28603 case V4SI_FTYPE_V4DF:
28604 case V4SI_FTYPE_V2DF:
28605 case V4HI_FTYPE_V4HI:
28606 case V4DF_FTYPE_V4DF:
28607 case V4DF_FTYPE_V4SI:
28608 case V4DF_FTYPE_V4SF:
28609 case V4DF_FTYPE_V2DF:
28610 case V4SF_FTYPE_V4SF:
28611 case V4SF_FTYPE_V4SI:
28612 case V4SF_FTYPE_V8SF:
28613 case V4SF_FTYPE_V4DF:
28614 case V4SF_FTYPE_V8HI:
28615 case V4SF_FTYPE_V2DF:
28616 case V2DI_FTYPE_V2DI:
28617 case V2DI_FTYPE_V16QI:
28618 case V2DI_FTYPE_V8HI:
28619 case V2DI_FTYPE_V4SI:
28620 case V2DF_FTYPE_V2DF:
28621 case V2DF_FTYPE_V4SI:
28622 case V2DF_FTYPE_V4DF:
28623 case V2DF_FTYPE_V4SF:
28624 case V2DF_FTYPE_V2SI:
28625 case V2SI_FTYPE_V2SI:
28626 case V2SI_FTYPE_V4SF:
28627 case V2SI_FTYPE_V2SF:
28628 case V2SI_FTYPE_V2DF:
28629 case V2SF_FTYPE_V2SF:
28630 case V2SF_FTYPE_V2SI:
28631 case V32QI_FTYPE_V32QI:
28632 case V32QI_FTYPE_V16QI:
28633 case V16HI_FTYPE_V16HI:
28634 case V16HI_FTYPE_V8HI:
28635 case V8SI_FTYPE_V8SI:
28636 case V16HI_FTYPE_V16QI:
28637 case V8SI_FTYPE_V16QI:
28638 case V4DI_FTYPE_V16QI:
28639 case V8SI_FTYPE_V8HI:
28640 case V4DI_FTYPE_V8HI:
28641 case V4DI_FTYPE_V4SI:
28642 case V4DI_FTYPE_V2DI:
28645 case V4SF_FTYPE_V4SF_VEC_MERGE:
28646 case V2DF_FTYPE_V2DF_VEC_MERGE:
28647 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
28648 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
28649 case V16QI_FTYPE_V16QI_V16QI:
28650 case V16QI_FTYPE_V8HI_V8HI:
28651 case V8QI_FTYPE_V8QI_V8QI:
28652 case V8QI_FTYPE_V4HI_V4HI:
28653 case V8HI_FTYPE_V8HI_V8HI:
28654 case V8HI_FTYPE_V16QI_V16QI:
28655 case V8HI_FTYPE_V4SI_V4SI:
28656 case V8SF_FTYPE_V8SF_V8SF:
28657 case V8SF_FTYPE_V8SF_V8SI:
28658 case V4SI_FTYPE_V4SI_V4SI:
28659 case V4SI_FTYPE_V8HI_V8HI:
28660 case V4SI_FTYPE_V4SF_V4SF:
28661 case V4SI_FTYPE_V2DF_V2DF:
28662 case V4HI_FTYPE_V4HI_V4HI:
28663 case V4HI_FTYPE_V8QI_V8QI:
28664 case V4HI_FTYPE_V2SI_V2SI:
28665 case V4DF_FTYPE_V4DF_V4DF:
28666 case V4DF_FTYPE_V4DF_V4DI:
28667 case V4SF_FTYPE_V4SF_V4SF:
28668 case V4SF_FTYPE_V4SF_V4SI:
28669 case V4SF_FTYPE_V4SF_V2SI:
28670 case V4SF_FTYPE_V4SF_V2DF:
28671 case V4SF_FTYPE_V4SF_DI:
28672 case V4SF_FTYPE_V4SF_SI:
28673 case V2DI_FTYPE_V2DI_V2DI:
28674 case V2DI_FTYPE_V16QI_V16QI:
28675 case V2DI_FTYPE_V4SI_V4SI:
28676 case V2DI_FTYPE_V2DI_V16QI:
28677 case V2DI_FTYPE_V2DF_V2DF:
28678 case V2SI_FTYPE_V2SI_V2SI:
28679 case V2SI_FTYPE_V4HI_V4HI:
28680 case V2SI_FTYPE_V2SF_V2SF:
28681 case V2DF_FTYPE_V2DF_V2DF:
28682 case V2DF_FTYPE_V2DF_V4SF:
28683 case V2DF_FTYPE_V2DF_V2DI:
28684 case V2DF_FTYPE_V2DF_DI:
28685 case V2DF_FTYPE_V2DF_SI:
28686 case V2SF_FTYPE_V2SF_V2SF:
28687 case V1DI_FTYPE_V1DI_V1DI:
28688 case V1DI_FTYPE_V8QI_V8QI:
28689 case V1DI_FTYPE_V2SI_V2SI:
28690 case V32QI_FTYPE_V16HI_V16HI:
28691 case V16HI_FTYPE_V8SI_V8SI:
28692 case V32QI_FTYPE_V32QI_V32QI:
28693 case V16HI_FTYPE_V32QI_V32QI:
28694 case V16HI_FTYPE_V16HI_V16HI:
28695 case V8SI_FTYPE_V4DF_V4DF:
28696 case V8SI_FTYPE_V8SI_V8SI:
28697 case V8SI_FTYPE_V16HI_V16HI:
28698 case V4DI_FTYPE_V4DI_V4DI:
28699 case V4DI_FTYPE_V8SI_V8SI:
28700 if (comparison == UNKNOWN)
28701 return ix86_expand_binop_builtin (icode, exp, target);
28704 case V4SF_FTYPE_V4SF_V4SF_SWAP:
28705 case V2DF_FTYPE_V2DF_V2DF_SWAP:
28706 gcc_assert (comparison != UNKNOWN);
28710 case V16HI_FTYPE_V16HI_V8HI_COUNT:
28711 case V16HI_FTYPE_V16HI_SI_COUNT:
28712 case V8SI_FTYPE_V8SI_V4SI_COUNT:
28713 case V8SI_FTYPE_V8SI_SI_COUNT:
28714 case V4DI_FTYPE_V4DI_V2DI_COUNT:
28715 case V4DI_FTYPE_V4DI_INT_COUNT:
28716 case V8HI_FTYPE_V8HI_V8HI_COUNT:
28717 case V8HI_FTYPE_V8HI_SI_COUNT:
28718 case V4SI_FTYPE_V4SI_V4SI_COUNT:
28719 case V4SI_FTYPE_V4SI_SI_COUNT:
28720 case V4HI_FTYPE_V4HI_V4HI_COUNT:
28721 case V4HI_FTYPE_V4HI_SI_COUNT:
28722 case V2DI_FTYPE_V2DI_V2DI_COUNT:
28723 case V2DI_FTYPE_V2DI_SI_COUNT:
28724 case V2SI_FTYPE_V2SI_V2SI_COUNT:
28725 case V2SI_FTYPE_V2SI_SI_COUNT:
28726 case V1DI_FTYPE_V1DI_V1DI_COUNT:
28727 case V1DI_FTYPE_V1DI_SI_COUNT:
28729 last_arg_count = true;
28731 case UINT64_FTYPE_UINT64_UINT64:
28732 case UINT_FTYPE_UINT_UINT:
28733 case UINT_FTYPE_UINT_USHORT:
28734 case UINT_FTYPE_UINT_UCHAR:
28735 case UINT16_FTYPE_UINT16_INT:
28736 case UINT8_FTYPE_UINT8_INT:
28739 case V2DI_FTYPE_V2DI_INT_CONVERT:
28742 nargs_constant = 1;
28744 case V4DI_FTYPE_V4DI_INT_CONVERT:
28747 nargs_constant = 1;
28749 case V8HI_FTYPE_V8HI_INT:
28750 case V8HI_FTYPE_V8SF_INT:
28751 case V8HI_FTYPE_V4SF_INT:
28752 case V8SF_FTYPE_V8SF_INT:
28753 case V4SI_FTYPE_V4SI_INT:
28754 case V4SI_FTYPE_V8SI_INT:
28755 case V4HI_FTYPE_V4HI_INT:
28756 case V4DF_FTYPE_V4DF_INT:
28757 case V4SF_FTYPE_V4SF_INT:
28758 case V4SF_FTYPE_V8SF_INT:
28759 case V2DI_FTYPE_V2DI_INT:
28760 case V2DF_FTYPE_V2DF_INT:
28761 case V2DF_FTYPE_V4DF_INT:
28762 case V16HI_FTYPE_V16HI_INT:
28763 case V8SI_FTYPE_V8SI_INT:
28764 case V4DI_FTYPE_V4DI_INT:
28765 case V2DI_FTYPE_V4DI_INT:
28767 nargs_constant = 1;
28769 case V16QI_FTYPE_V16QI_V16QI_V16QI:
28770 case V8SF_FTYPE_V8SF_V8SF_V8SF:
28771 case V4DF_FTYPE_V4DF_V4DF_V4DF:
28772 case V4SF_FTYPE_V4SF_V4SF_V4SF:
28773 case V2DF_FTYPE_V2DF_V2DF_V2DF:
28774 case V32QI_FTYPE_V32QI_V32QI_V32QI:
28777 case V32QI_FTYPE_V32QI_V32QI_INT:
28778 case V16HI_FTYPE_V16HI_V16HI_INT:
28779 case V16QI_FTYPE_V16QI_V16QI_INT:
28780 case V4DI_FTYPE_V4DI_V4DI_INT:
28781 case V8HI_FTYPE_V8HI_V8HI_INT:
28782 case V8SI_FTYPE_V8SI_V8SI_INT:
28783 case V8SI_FTYPE_V8SI_V4SI_INT:
28784 case V8SF_FTYPE_V8SF_V8SF_INT:
28785 case V8SF_FTYPE_V8SF_V4SF_INT:
28786 case V4SI_FTYPE_V4SI_V4SI_INT:
28787 case V4DF_FTYPE_V4DF_V4DF_INT:
28788 case V4DF_FTYPE_V4DF_V2DF_INT:
28789 case V4SF_FTYPE_V4SF_V4SF_INT:
28790 case V2DI_FTYPE_V2DI_V2DI_INT:
28791 case V4DI_FTYPE_V4DI_V2DI_INT:
28792 case V2DF_FTYPE_V2DF_V2DF_INT:
28794 nargs_constant = 1;
28796 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
28799 nargs_constant = 1;
28801 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
28804 nargs_constant = 1;
28806 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
28809 nargs_constant = 1;
28811 case V2DI_FTYPE_V2DI_UINT_UINT:
28813 nargs_constant = 2;
28815 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
28816 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
28817 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
28818 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
28820 nargs_constant = 1;
28822 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
28824 nargs_constant = 2;
28827 gcc_unreachable ();
28830 gcc_assert (nargs <= ARRAY_SIZE (args));
28832 if (comparison != UNKNOWN)
28834 gcc_assert (nargs == 2);
28835 return ix86_expand_sse_compare (d, exp, target, swap);
28838 if (rmode == VOIDmode || rmode == tmode)
28842 || GET_MODE (target) != tmode
28843 || !insn_p->operand[0].predicate (target, tmode))
28844 target = gen_reg_rtx (tmode);
28845 real_target = target;
28849 target = gen_reg_rtx (rmode);
28850 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
28853 for (i = 0; i < nargs; i++)
28855 tree arg = CALL_EXPR_ARG (exp, i);
28856 rtx op = expand_normal (arg);
28857 enum machine_mode mode = insn_p->operand[i + 1].mode;
28858 bool match = insn_p->operand[i + 1].predicate (op, mode);
28860 if (last_arg_count && (i + 1) == nargs)
28862 /* SIMD shift insns take either an 8-bit immediate or
28863 register as count. But builtin functions take int as
28864 count. If count doesn't match, we put it in register. */
28867 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
28868 if (!insn_p->operand[i + 1].predicate (op, mode))
28869 op = copy_to_reg (op);
28872 else if ((nargs - i) <= nargs_constant)
28877 case CODE_FOR_avx2_inserti128:
28878 case CODE_FOR_avx2_extracti128:
28879 error ("the last argument must be an 1-bit immediate");
28882 case CODE_FOR_sse4_1_roundsd:
28883 case CODE_FOR_sse4_1_roundss:
28885 case CODE_FOR_sse4_1_roundpd:
28886 case CODE_FOR_sse4_1_roundps:
28887 case CODE_FOR_avx_roundpd256:
28888 case CODE_FOR_avx_roundps256:
28890 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
28891 case CODE_FOR_sse4_1_roundps_sfix:
28892 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
28893 case CODE_FOR_avx_roundps_sfix256:
28895 case CODE_FOR_sse4_1_blendps:
28896 case CODE_FOR_avx_blendpd256:
28897 case CODE_FOR_avx_vpermilv4df:
28898 error ("the last argument must be a 4-bit immediate");
28901 case CODE_FOR_sse4_1_blendpd:
28902 case CODE_FOR_avx_vpermilv2df:
28903 case CODE_FOR_xop_vpermil2v2df3:
28904 case CODE_FOR_xop_vpermil2v4sf3:
28905 case CODE_FOR_xop_vpermil2v4df3:
28906 case CODE_FOR_xop_vpermil2v8sf3:
28907 error ("the last argument must be a 2-bit immediate");
28910 case CODE_FOR_avx_vextractf128v4df:
28911 case CODE_FOR_avx_vextractf128v8sf:
28912 case CODE_FOR_avx_vextractf128v8si:
28913 case CODE_FOR_avx_vinsertf128v4df:
28914 case CODE_FOR_avx_vinsertf128v8sf:
28915 case CODE_FOR_avx_vinsertf128v8si:
28916 error ("the last argument must be a 1-bit immediate");
28919 case CODE_FOR_avx_vmcmpv2df3:
28920 case CODE_FOR_avx_vmcmpv4sf3:
28921 case CODE_FOR_avx_cmpv2df3:
28922 case CODE_FOR_avx_cmpv4sf3:
28923 case CODE_FOR_avx_cmpv4df3:
28924 case CODE_FOR_avx_cmpv8sf3:
28925 error ("the last argument must be a 5-bit immediate");
28929 switch (nargs_constant)
28932 if ((nargs - i) == nargs_constant)
28934 error ("the next to last argument must be an 8-bit immediate");
28938 error ("the last argument must be an 8-bit immediate");
28941 gcc_unreachable ();
28948 if (VECTOR_MODE_P (mode))
28949 op = safe_vector_operand (op, mode);
28951 /* If we aren't optimizing, only allow one memory operand to
28953 if (memory_operand (op, mode))
28956 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
28958 if (optimize || !match || num_memory > 1)
28959 op = copy_to_mode_reg (mode, op);
28963 op = copy_to_reg (op);
28964 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
28969 args[i].mode = mode;
28975 pat = GEN_FCN (icode) (real_target, args[0].op);
28978 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
28981 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28985 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28986 args[2].op, args[3].op);
28989 gcc_unreachable ();
28999 /* Subroutine of ix86_expand_builtin to take care of special insns
29000 with variable number of operands. */
29003 ix86_expand_special_args_builtin (const struct builtin_description *d,
29004 tree exp, rtx target)
29008 unsigned int i, nargs, arg_adjust, memory;
29012 enum machine_mode mode;
29014 enum insn_code icode = d->icode;
29015 bool last_arg_constant = false;
29016 const struct insn_data_d *insn_p = &insn_data[icode];
29017 enum machine_mode tmode = insn_p->operand[0].mode;
29018 enum { load, store } klass;
29020 switch ((enum ix86_builtin_func_type) d->flag)
29022 case VOID_FTYPE_VOID:
29023 if (icode == CODE_FOR_avx_vzeroupper)
29024 target = GEN_INT (vzeroupper_intrinsic);
29025 emit_insn (GEN_FCN (icode) (target));
29027 case VOID_FTYPE_UINT64:
29028 case VOID_FTYPE_UNSIGNED:
29033 case UINT64_FTYPE_VOID:
29034 case UNSIGNED_FTYPE_VOID:
29039 case UINT64_FTYPE_PUNSIGNED:
29040 case V2DI_FTYPE_PV2DI:
29041 case V4DI_FTYPE_PV4DI:
29042 case V32QI_FTYPE_PCCHAR:
29043 case V16QI_FTYPE_PCCHAR:
29044 case V8SF_FTYPE_PCV4SF:
29045 case V8SF_FTYPE_PCFLOAT:
29046 case V4SF_FTYPE_PCFLOAT:
29047 case V4DF_FTYPE_PCV2DF:
29048 case V4DF_FTYPE_PCDOUBLE:
29049 case V2DF_FTYPE_PCDOUBLE:
29050 case VOID_FTYPE_PVOID:
29055 case VOID_FTYPE_PV2SF_V4SF:
29056 case VOID_FTYPE_PV4DI_V4DI:
29057 case VOID_FTYPE_PV2DI_V2DI:
29058 case VOID_FTYPE_PCHAR_V32QI:
29059 case VOID_FTYPE_PCHAR_V16QI:
29060 case VOID_FTYPE_PFLOAT_V8SF:
29061 case VOID_FTYPE_PFLOAT_V4SF:
29062 case VOID_FTYPE_PDOUBLE_V4DF:
29063 case VOID_FTYPE_PDOUBLE_V2DF:
29064 case VOID_FTYPE_PLONGLONG_LONGLONG:
29065 case VOID_FTYPE_PULONGLONG_ULONGLONG:
29066 case VOID_FTYPE_PINT_INT:
29069 /* Reserve memory operand for target. */
29070 memory = ARRAY_SIZE (args);
29072 case V4SF_FTYPE_V4SF_PCV2SF:
29073 case V2DF_FTYPE_V2DF_PCDOUBLE:
29078 case V8SF_FTYPE_PCV8SF_V8SI:
29079 case V4DF_FTYPE_PCV4DF_V4DI:
29080 case V4SF_FTYPE_PCV4SF_V4SI:
29081 case V2DF_FTYPE_PCV2DF_V2DI:
29082 case V8SI_FTYPE_PCV8SI_V8SI:
29083 case V4DI_FTYPE_PCV4DI_V4DI:
29084 case V4SI_FTYPE_PCV4SI_V4SI:
29085 case V2DI_FTYPE_PCV2DI_V2DI:
29090 case VOID_FTYPE_PV8SF_V8SI_V8SF:
29091 case VOID_FTYPE_PV4DF_V4DI_V4DF:
29092 case VOID_FTYPE_PV4SF_V4SI_V4SF:
29093 case VOID_FTYPE_PV2DF_V2DI_V2DF:
29094 case VOID_FTYPE_PV8SI_V8SI_V8SI:
29095 case VOID_FTYPE_PV4DI_V4DI_V4DI:
29096 case VOID_FTYPE_PV4SI_V4SI_V4SI:
29097 case VOID_FTYPE_PV2DI_V2DI_V2DI:
29100 /* Reserve memory operand for target. */
29101 memory = ARRAY_SIZE (args);
29103 case VOID_FTYPE_UINT_UINT_UINT:
29104 case VOID_FTYPE_UINT64_UINT_UINT:
29105 case UCHAR_FTYPE_UINT_UINT_UINT:
29106 case UCHAR_FTYPE_UINT64_UINT_UINT:
29109 memory = ARRAY_SIZE (args);
29110 last_arg_constant = true;
29113 gcc_unreachable ();
29116 gcc_assert (nargs <= ARRAY_SIZE (args));
29118 if (klass == store)
29120 arg = CALL_EXPR_ARG (exp, 0);
29121 op = expand_normal (arg);
29122 gcc_assert (target == 0);
29125 if (GET_MODE (op) != Pmode)
29126 op = convert_to_mode (Pmode, op, 1);
29127 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
29130 target = force_reg (tmode, op);
29138 || !register_operand (target, tmode)
29139 || GET_MODE (target) != tmode)
29140 target = gen_reg_rtx (tmode);
29143 for (i = 0; i < nargs; i++)
29145 enum machine_mode mode = insn_p->operand[i + 1].mode;
29148 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
29149 op = expand_normal (arg);
29150 match = insn_p->operand[i + 1].predicate (op, mode);
29152 if (last_arg_constant && (i + 1) == nargs)
29156 if (icode == CODE_FOR_lwp_lwpvalsi3
29157 || icode == CODE_FOR_lwp_lwpinssi3
29158 || icode == CODE_FOR_lwp_lwpvaldi3
29159 || icode == CODE_FOR_lwp_lwpinsdi3)
29160 error ("the last argument must be a 32-bit immediate");
29162 error ("the last argument must be an 8-bit immediate");
29170 /* This must be the memory operand. */
29171 if (GET_MODE (op) != Pmode)
29172 op = convert_to_mode (Pmode, op, 1);
29173 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
29174 gcc_assert (GET_MODE (op) == mode
29175 || GET_MODE (op) == VOIDmode);
29179 /* This must be register. */
29180 if (VECTOR_MODE_P (mode))
29181 op = safe_vector_operand (op, mode);
29183 gcc_assert (GET_MODE (op) == mode
29184 || GET_MODE (op) == VOIDmode);
29185 op = copy_to_mode_reg (mode, op);
29190 args[i].mode = mode;
29196 pat = GEN_FCN (icode) (target);
29199 pat = GEN_FCN (icode) (target, args[0].op);
29202 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
29205 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
29208 gcc_unreachable ();
29214 return klass == store ? 0 : target;
29217 /* Return the integer constant in ARG. Constrain it to be in the range
29218 of the subparts of VEC_TYPE; issue an error if not. */
29221 get_element_number (tree vec_type, tree arg)
29223 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
29225 if (!host_integerp (arg, 1)
29226 || (elt = tree_low_cst (arg, 1), elt > max))
29228 error ("selector must be an integer constant in the range 0..%wi", max);
29235 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29236 ix86_expand_vector_init. We DO have language-level syntax for this, in
29237 the form of (type){ init-list }. Except that since we can't place emms
29238 instructions from inside the compiler, we can't allow the use of MMX
29239 registers unless the user explicitly asks for it. So we do *not* define
29240 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29241 we have builtins invoked by mmintrin.h that gives us license to emit
29242 these sorts of instructions. */
29245 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
29247 enum machine_mode tmode = TYPE_MODE (type);
29248 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
29249 int i, n_elt = GET_MODE_NUNITS (tmode);
29250 rtvec v = rtvec_alloc (n_elt);
29252 gcc_assert (VECTOR_MODE_P (tmode));
29253 gcc_assert (call_expr_nargs (exp) == n_elt);
29255 for (i = 0; i < n_elt; ++i)
29257 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
29258 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
29261 if (!target || !register_operand (target, tmode))
29262 target = gen_reg_rtx (tmode);
29264 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
29268 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29269 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29270 had a language-level syntax for referencing vector elements. */
29273 ix86_expand_vec_ext_builtin (tree exp, rtx target)
29275 enum machine_mode tmode, mode0;
29280 arg0 = CALL_EXPR_ARG (exp, 0);
29281 arg1 = CALL_EXPR_ARG (exp, 1);
29283 op0 = expand_normal (arg0);
29284 elt = get_element_number (TREE_TYPE (arg0), arg1);
29286 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29287 mode0 = TYPE_MODE (TREE_TYPE (arg0));
29288 gcc_assert (VECTOR_MODE_P (mode0));
29290 op0 = force_reg (mode0, op0);
29292 if (optimize || !target || !register_operand (target, tmode))
29293 target = gen_reg_rtx (tmode);
29295 ix86_expand_vector_extract (true, target, op0, elt);
29300 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29301 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29302 a language-level syntax for referencing vector elements. */
29305 ix86_expand_vec_set_builtin (tree exp)
29307 enum machine_mode tmode, mode1;
29308 tree arg0, arg1, arg2;
29310 rtx op0, op1, target;
29312 arg0 = CALL_EXPR_ARG (exp, 0);
29313 arg1 = CALL_EXPR_ARG (exp, 1);
29314 arg2 = CALL_EXPR_ARG (exp, 2);
29316 tmode = TYPE_MODE (TREE_TYPE (arg0));
29317 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29318 gcc_assert (VECTOR_MODE_P (tmode));
29320 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
29321 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
29322 elt = get_element_number (TREE_TYPE (arg0), arg2);
29324 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
29325 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
29327 op0 = force_reg (tmode, op0);
29328 op1 = force_reg (mode1, op1);
29330 /* OP0 is the source of these builtin functions and shouldn't be
29331 modified. Create a copy, use it and return it as target. */
29332 target = gen_reg_rtx (tmode);
29333 emit_move_insn (target, op0);
29334 ix86_expand_vector_set (true, target, op1, elt);
29339 /* Expand an expression EXP that calls a built-in function,
29340 with result going to TARGET if that's convenient
29341 (and in mode MODE if that's convenient).
29342 SUBTARGET may be used as the target for computing one of EXP's operands.
29343 IGNORE is nonzero if the value is to be ignored. */
29346 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
29347 enum machine_mode mode ATTRIBUTE_UNUSED,
29348 int ignore ATTRIBUTE_UNUSED)
29350 const struct builtin_description *d;
29352 enum insn_code icode;
29353 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
29354 tree arg0, arg1, arg2, arg3, arg4;
29355 rtx op0, op1, op2, op3, op4, pat;
29356 enum machine_mode mode0, mode1, mode2, mode3, mode4;
29357 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
29359 /* Determine whether the builtin function is available under the current ISA.
29360 Originally the builtin was not created if it wasn't applicable to the
29361 current ISA based on the command line switches. With function specific
29362 options, we need to check in the context of the function making the call
29363 whether it is supported. */
29364 if (ix86_builtins_isa[fcode].isa
29365 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
29367 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
29368 NULL, (enum fpmath_unit) 0, false);
29371 error ("%qE needs unknown isa option", fndecl);
29374 gcc_assert (opts != NULL);
29375 error ("%qE needs isa option %s", fndecl, opts);
29383 case IX86_BUILTIN_MASKMOVQ:
29384 case IX86_BUILTIN_MASKMOVDQU:
29385 icode = (fcode == IX86_BUILTIN_MASKMOVQ
29386 ? CODE_FOR_mmx_maskmovq
29387 : CODE_FOR_sse2_maskmovdqu);
29388 /* Note the arg order is different from the operand order. */
29389 arg1 = CALL_EXPR_ARG (exp, 0);
29390 arg2 = CALL_EXPR_ARG (exp, 1);
29391 arg0 = CALL_EXPR_ARG (exp, 2);
29392 op0 = expand_normal (arg0);
29393 op1 = expand_normal (arg1);
29394 op2 = expand_normal (arg2);
29395 mode0 = insn_data[icode].operand[0].mode;
29396 mode1 = insn_data[icode].operand[1].mode;
29397 mode2 = insn_data[icode].operand[2].mode;
29399 if (GET_MODE (op0) != Pmode)
29400 op0 = convert_to_mode (Pmode, op0, 1);
29401 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
29403 if (!insn_data[icode].operand[0].predicate (op0, mode0))
29404 op0 = copy_to_mode_reg (mode0, op0);
29405 if (!insn_data[icode].operand[1].predicate (op1, mode1))
29406 op1 = copy_to_mode_reg (mode1, op1);
29407 if (!insn_data[icode].operand[2].predicate (op2, mode2))
29408 op2 = copy_to_mode_reg (mode2, op2);
29409 pat = GEN_FCN (icode) (op0, op1, op2);
29415 case IX86_BUILTIN_LDMXCSR:
29416 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
29417 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29418 emit_move_insn (target, op0);
29419 emit_insn (gen_sse_ldmxcsr (target));
29422 case IX86_BUILTIN_STMXCSR:
29423 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29424 emit_insn (gen_sse_stmxcsr (target));
29425 return copy_to_mode_reg (SImode, target);
29427 case IX86_BUILTIN_CLFLUSH:
29428 arg0 = CALL_EXPR_ARG (exp, 0);
29429 op0 = expand_normal (arg0);
29430 icode = CODE_FOR_sse2_clflush;
29431 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29433 if (GET_MODE (op0) != Pmode)
29434 op0 = convert_to_mode (Pmode, op0, 1);
29435 op0 = force_reg (Pmode, op0);
29438 emit_insn (gen_sse2_clflush (op0));
29441 case IX86_BUILTIN_MONITOR:
29442 arg0 = CALL_EXPR_ARG (exp, 0);
29443 arg1 = CALL_EXPR_ARG (exp, 1);
29444 arg2 = CALL_EXPR_ARG (exp, 2);
29445 op0 = expand_normal (arg0);
29446 op1 = expand_normal (arg1);
29447 op2 = expand_normal (arg2);
29450 if (GET_MODE (op0) != Pmode)
29451 op0 = convert_to_mode (Pmode, op0, 1);
29452 op0 = force_reg (Pmode, op0);
29455 op1 = copy_to_mode_reg (SImode, op1);
29457 op2 = copy_to_mode_reg (SImode, op2);
29458 emit_insn (ix86_gen_monitor (op0, op1, op2));
29461 case IX86_BUILTIN_MWAIT:
29462 arg0 = CALL_EXPR_ARG (exp, 0);
29463 arg1 = CALL_EXPR_ARG (exp, 1);
29464 op0 = expand_normal (arg0);
29465 op1 = expand_normal (arg1);
29467 op0 = copy_to_mode_reg (SImode, op0);
29469 op1 = copy_to_mode_reg (SImode, op1);
29470 emit_insn (gen_sse3_mwait (op0, op1));
29473 case IX86_BUILTIN_VEC_INIT_V2SI:
29474 case IX86_BUILTIN_VEC_INIT_V4HI:
29475 case IX86_BUILTIN_VEC_INIT_V8QI:
29476 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
29478 case IX86_BUILTIN_VEC_EXT_V2DF:
29479 case IX86_BUILTIN_VEC_EXT_V2DI:
29480 case IX86_BUILTIN_VEC_EXT_V4SF:
29481 case IX86_BUILTIN_VEC_EXT_V4SI:
29482 case IX86_BUILTIN_VEC_EXT_V8HI:
29483 case IX86_BUILTIN_VEC_EXT_V2SI:
29484 case IX86_BUILTIN_VEC_EXT_V4HI:
29485 case IX86_BUILTIN_VEC_EXT_V16QI:
29486 return ix86_expand_vec_ext_builtin (exp, target);
29488 case IX86_BUILTIN_VEC_SET_V2DI:
29489 case IX86_BUILTIN_VEC_SET_V4SF:
29490 case IX86_BUILTIN_VEC_SET_V4SI:
29491 case IX86_BUILTIN_VEC_SET_V8HI:
29492 case IX86_BUILTIN_VEC_SET_V4HI:
29493 case IX86_BUILTIN_VEC_SET_V16QI:
29494 return ix86_expand_vec_set_builtin (exp);
29496 case IX86_BUILTIN_INFQ:
29497 case IX86_BUILTIN_HUGE_VALQ:
29499 REAL_VALUE_TYPE inf;
29503 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
29505 tmp = validize_mem (force_const_mem (mode, tmp));
29508 target = gen_reg_rtx (mode);
29510 emit_move_insn (target, tmp);
29514 case IX86_BUILTIN_LLWPCB:
29515 arg0 = CALL_EXPR_ARG (exp, 0);
29516 op0 = expand_normal (arg0);
29517 icode = CODE_FOR_lwp_llwpcb;
29518 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29520 if (GET_MODE (op0) != Pmode)
29521 op0 = convert_to_mode (Pmode, op0, 1);
29522 op0 = force_reg (Pmode, op0);
29524 emit_insn (gen_lwp_llwpcb (op0));
29527 case IX86_BUILTIN_SLWPCB:
29528 icode = CODE_FOR_lwp_slwpcb;
29530 || !insn_data[icode].operand[0].predicate (target, Pmode))
29531 target = gen_reg_rtx (Pmode);
29532 emit_insn (gen_lwp_slwpcb (target));
29535 case IX86_BUILTIN_BEXTRI32:
29536 case IX86_BUILTIN_BEXTRI64:
29537 arg0 = CALL_EXPR_ARG (exp, 0);
29538 arg1 = CALL_EXPR_ARG (exp, 1);
29539 op0 = expand_normal (arg0);
29540 op1 = expand_normal (arg1);
29541 icode = (fcode == IX86_BUILTIN_BEXTRI32
29542 ? CODE_FOR_tbm_bextri_si
29543 : CODE_FOR_tbm_bextri_di);
29544 if (!CONST_INT_P (op1))
29546 error ("last argument must be an immediate");
29551 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
29552 unsigned char lsb_index = INTVAL (op1) & 0xFF;
29553 op1 = GEN_INT (length);
29554 op2 = GEN_INT (lsb_index);
29555 pat = GEN_FCN (icode) (target, op0, op1, op2);
29561 case IX86_BUILTIN_RDRAND16_STEP:
29562 icode = CODE_FOR_rdrandhi_1;
29566 case IX86_BUILTIN_RDRAND32_STEP:
29567 icode = CODE_FOR_rdrandsi_1;
29571 case IX86_BUILTIN_RDRAND64_STEP:
29572 icode = CODE_FOR_rdranddi_1;
29576 op0 = gen_reg_rtx (mode0);
29577 emit_insn (GEN_FCN (icode) (op0));
29579 arg0 = CALL_EXPR_ARG (exp, 0);
29580 op1 = expand_normal (arg0);
29581 if (!address_operand (op1, VOIDmode))
29583 op1 = convert_memory_address (Pmode, op1);
29584 op1 = copy_addr_to_reg (op1);
29586 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
29588 op1 = gen_reg_rtx (SImode);
29589 emit_move_insn (op1, CONST1_RTX (SImode));
29591 /* Emit SImode conditional move. */
29592 if (mode0 == HImode)
29594 op2 = gen_reg_rtx (SImode);
29595 emit_insn (gen_zero_extendhisi2 (op2, op0));
29597 else if (mode0 == SImode)
29600 op2 = gen_rtx_SUBREG (SImode, op0, 0);
29603 target = gen_reg_rtx (SImode);
29605 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
29607 emit_insn (gen_rtx_SET (VOIDmode, target,
29608 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
29611 case IX86_BUILTIN_GATHERSIV2DF:
29612 icode = CODE_FOR_avx2_gathersiv2df;
29614 case IX86_BUILTIN_GATHERSIV4DF:
29615 icode = CODE_FOR_avx2_gathersiv4df;
29617 case IX86_BUILTIN_GATHERDIV2DF:
29618 icode = CODE_FOR_avx2_gatherdiv2df;
29620 case IX86_BUILTIN_GATHERDIV4DF:
29621 icode = CODE_FOR_avx2_gatherdiv4df;
29623 case IX86_BUILTIN_GATHERSIV4SF:
29624 icode = CODE_FOR_avx2_gathersiv4sf;
29626 case IX86_BUILTIN_GATHERSIV8SF:
29627 icode = CODE_FOR_avx2_gathersiv8sf;
29629 case IX86_BUILTIN_GATHERDIV4SF:
29630 icode = CODE_FOR_avx2_gatherdiv4sf;
29632 case IX86_BUILTIN_GATHERDIV8SF:
29633 icode = CODE_FOR_avx2_gatherdiv8sf;
29635 case IX86_BUILTIN_GATHERSIV2DI:
29636 icode = CODE_FOR_avx2_gathersiv2di;
29638 case IX86_BUILTIN_GATHERSIV4DI:
29639 icode = CODE_FOR_avx2_gathersiv4di;
29641 case IX86_BUILTIN_GATHERDIV2DI:
29642 icode = CODE_FOR_avx2_gatherdiv2di;
29644 case IX86_BUILTIN_GATHERDIV4DI:
29645 icode = CODE_FOR_avx2_gatherdiv4di;
29647 case IX86_BUILTIN_GATHERSIV4SI:
29648 icode = CODE_FOR_avx2_gathersiv4si;
29650 case IX86_BUILTIN_GATHERSIV8SI:
29651 icode = CODE_FOR_avx2_gathersiv8si;
29653 case IX86_BUILTIN_GATHERDIV4SI:
29654 icode = CODE_FOR_avx2_gatherdiv4si;
29656 case IX86_BUILTIN_GATHERDIV8SI:
29657 icode = CODE_FOR_avx2_gatherdiv8si;
29659 case IX86_BUILTIN_GATHERALTSIV4DF:
29660 icode = CODE_FOR_avx2_gathersiv4df;
29662 case IX86_BUILTIN_GATHERALTDIV8SF:
29663 icode = CODE_FOR_avx2_gatherdiv8sf;
29665 case IX86_BUILTIN_GATHERALTSIV4DI:
29666 icode = CODE_FOR_avx2_gathersiv4di;
29668 case IX86_BUILTIN_GATHERALTDIV8SI:
29669 icode = CODE_FOR_avx2_gatherdiv8si;
29673 arg0 = CALL_EXPR_ARG (exp, 0);
29674 arg1 = CALL_EXPR_ARG (exp, 1);
29675 arg2 = CALL_EXPR_ARG (exp, 2);
29676 arg3 = CALL_EXPR_ARG (exp, 3);
29677 arg4 = CALL_EXPR_ARG (exp, 4);
29678 op0 = expand_normal (arg0);
29679 op1 = expand_normal (arg1);
29680 op2 = expand_normal (arg2);
29681 op3 = expand_normal (arg3);
29682 op4 = expand_normal (arg4);
29683 /* Note the arg order is different from the operand order. */
29684 mode0 = insn_data[icode].operand[1].mode;
29685 mode2 = insn_data[icode].operand[3].mode;
29686 mode3 = insn_data[icode].operand[4].mode;
29687 mode4 = insn_data[icode].operand[5].mode;
29689 if (target == NULL_RTX
29690 || GET_MODE (target) != insn_data[icode].operand[0].mode)
29691 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
29693 subtarget = target;
29695 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
29696 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
29698 rtx half = gen_reg_rtx (V4SImode);
29699 if (!nonimmediate_operand (op2, V8SImode))
29700 op2 = copy_to_mode_reg (V8SImode, op2);
29701 emit_insn (gen_vec_extract_lo_v8si (half, op2));
29704 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
29705 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
29707 rtx (*gen) (rtx, rtx);
29708 rtx half = gen_reg_rtx (mode0);
29709 if (mode0 == V4SFmode)
29710 gen = gen_vec_extract_lo_v8sf;
29712 gen = gen_vec_extract_lo_v8si;
29713 if (!nonimmediate_operand (op0, GET_MODE (op0)))
29714 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
29715 emit_insn (gen (half, op0));
29717 if (!nonimmediate_operand (op3, GET_MODE (op3)))
29718 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
29719 emit_insn (gen (half, op3));
29723 /* Force memory operand only with base register here. But we
29724 don't want to do it on memory operand for other builtin
29726 if (GET_MODE (op1) != Pmode)
29727 op1 = convert_to_mode (Pmode, op1, 1);
29728 op1 = force_reg (Pmode, op1);
29730 if (!insn_data[icode].operand[1].predicate (op0, mode0))
29731 op0 = copy_to_mode_reg (mode0, op0);
29732 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
29733 op1 = copy_to_mode_reg (Pmode, op1);
29734 if (!insn_data[icode].operand[3].predicate (op2, mode2))
29735 op2 = copy_to_mode_reg (mode2, op2);
29736 if (!insn_data[icode].operand[4].predicate (op3, mode3))
29737 op3 = copy_to_mode_reg (mode3, op3);
29738 if (!insn_data[icode].operand[5].predicate (op4, mode4))
29740 error ("last argument must be scale 1, 2, 4, 8");
29744 /* Optimize. If mask is known to have all high bits set,
29745 replace op0 with pc_rtx to signal that the instruction
29746 overwrites the whole destination and doesn't use its
29747 previous contents. */
29750 if (TREE_CODE (arg3) == VECTOR_CST)
29753 unsigned int negative = 0;
29754 for (elt = TREE_VECTOR_CST_ELTS (arg3);
29755 elt; elt = TREE_CHAIN (elt))
29757 tree cst = TREE_VALUE (elt);
29758 if (TREE_CODE (cst) == INTEGER_CST
29759 && tree_int_cst_sign_bit (cst))
29761 else if (TREE_CODE (cst) == REAL_CST
29762 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
29765 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
29768 else if (TREE_CODE (arg3) == SSA_NAME)
29770 /* Recognize also when mask is like:
29771 __v2df src = _mm_setzero_pd ();
29772 __v2df mask = _mm_cmpeq_pd (src, src);
29774 __v8sf src = _mm256_setzero_ps ();
29775 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29776 as that is a cheaper way to load all ones into
29777 a register than having to load a constant from
29779 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
29780 if (is_gimple_call (def_stmt))
29782 tree fndecl = gimple_call_fndecl (def_stmt);
29784 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
29785 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
29787 case IX86_BUILTIN_CMPPD:
29788 case IX86_BUILTIN_CMPPS:
29789 case IX86_BUILTIN_CMPPD256:
29790 case IX86_BUILTIN_CMPPS256:
29791 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
29794 case IX86_BUILTIN_CMPEQPD:
29795 case IX86_BUILTIN_CMPEQPS:
29796 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
29797 && initializer_zerop (gimple_call_arg (def_stmt,
29808 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
29813 if (fcode == IX86_BUILTIN_GATHERDIV8SF
29814 || fcode == IX86_BUILTIN_GATHERDIV8SI)
29816 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
29817 ? V4SFmode : V4SImode;
29818 if (target == NULL_RTX)
29819 target = gen_reg_rtx (tmode);
29820 if (tmode == V4SFmode)
29821 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
29823 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
29826 target = subtarget;
29834 for (i = 0, d = bdesc_special_args;
29835 i < ARRAY_SIZE (bdesc_special_args);
29837 if (d->code == fcode)
29838 return ix86_expand_special_args_builtin (d, exp, target);
29840 for (i = 0, d = bdesc_args;
29841 i < ARRAY_SIZE (bdesc_args);
29843 if (d->code == fcode)
29846 case IX86_BUILTIN_FABSQ:
29847 case IX86_BUILTIN_COPYSIGNQ:
29849 /* Emit a normal call if SSE2 isn't available. */
29850 return expand_call (exp, target, ignore);
29852 return ix86_expand_args_builtin (d, exp, target);
29855 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
29856 if (d->code == fcode)
29857 return ix86_expand_sse_comi (d, exp, target);
29859 for (i = 0, d = bdesc_pcmpestr;
29860 i < ARRAY_SIZE (bdesc_pcmpestr);
29862 if (d->code == fcode)
29863 return ix86_expand_sse_pcmpestr (d, exp, target);
29865 for (i = 0, d = bdesc_pcmpistr;
29866 i < ARRAY_SIZE (bdesc_pcmpistr);
29868 if (d->code == fcode)
29869 return ix86_expand_sse_pcmpistr (d, exp, target);
29871 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
29872 if (d->code == fcode)
29873 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
29874 (enum ix86_builtin_func_type)
29875 d->flag, d->comparison);
29877 gcc_unreachable ();
29880 /* Returns a function decl for a vectorized version of the builtin function
29881 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29882 if it is not available. */
29885 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
29888 enum machine_mode in_mode, out_mode;
29890 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29892 if (TREE_CODE (type_out) != VECTOR_TYPE
29893 || TREE_CODE (type_in) != VECTOR_TYPE
29894 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
29897 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29898 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29899 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29900 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29904 case BUILT_IN_SQRT:
29905 if (out_mode == DFmode && in_mode == DFmode)
29907 if (out_n == 2 && in_n == 2)
29908 return ix86_builtins[IX86_BUILTIN_SQRTPD];
29909 else if (out_n == 4 && in_n == 4)
29910 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
29914 case BUILT_IN_SQRTF:
29915 if (out_mode == SFmode && in_mode == SFmode)
29917 if (out_n == 4 && in_n == 4)
29918 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
29919 else if (out_n == 8 && in_n == 8)
29920 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
29924 case BUILT_IN_IFLOOR:
29925 case BUILT_IN_LFLOOR:
29926 case BUILT_IN_LLFLOOR:
29927 /* The round insn does not trap on denormals. */
29928 if (flag_trapping_math || !TARGET_ROUND)
29931 if (out_mode == SImode && in_mode == DFmode)
29933 if (out_n == 4 && in_n == 2)
29934 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
29935 else if (out_n == 8 && in_n == 4)
29936 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
29940 case BUILT_IN_IFLOORF:
29941 case BUILT_IN_LFLOORF:
29942 case BUILT_IN_LLFLOORF:
29943 /* The round insn does not trap on denormals. */
29944 if (flag_trapping_math || !TARGET_ROUND)
29947 if (out_mode == SImode && in_mode == SFmode)
29949 if (out_n == 4 && in_n == 4)
29950 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
29951 else if (out_n == 8 && in_n == 8)
29952 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
29956 case BUILT_IN_ICEIL:
29957 case BUILT_IN_LCEIL:
29958 case BUILT_IN_LLCEIL:
29959 /* The round insn does not trap on denormals. */
29960 if (flag_trapping_math || !TARGET_ROUND)
29963 if (out_mode == SImode && in_mode == DFmode)
29965 if (out_n == 4 && in_n == 2)
29966 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
29967 else if (out_n == 8 && in_n == 4)
29968 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
29972 case BUILT_IN_ICEILF:
29973 case BUILT_IN_LCEILF:
29974 case BUILT_IN_LLCEILF:
29975 /* The round insn does not trap on denormals. */
29976 if (flag_trapping_math || !TARGET_ROUND)
29979 if (out_mode == SImode && in_mode == SFmode)
29981 if (out_n == 4 && in_n == 4)
29982 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
29983 else if (out_n == 8 && in_n == 8)
29984 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
29988 case BUILT_IN_IRINT:
29989 case BUILT_IN_LRINT:
29990 case BUILT_IN_LLRINT:
29991 if (out_mode == SImode && in_mode == DFmode)
29993 if (out_n == 4 && in_n == 2)
29994 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
29995 else if (out_n == 8 && in_n == 4)
29996 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
30000 case BUILT_IN_IRINTF:
30001 case BUILT_IN_LRINTF:
30002 case BUILT_IN_LLRINTF:
30003 if (out_mode == SImode && in_mode == SFmode)
30005 if (out_n == 4 && in_n == 4)
30006 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
30007 else if (out_n == 8 && in_n == 8)
30008 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
30012 case BUILT_IN_IROUND:
30013 case BUILT_IN_LROUND:
30014 case BUILT_IN_LLROUND:
30015 /* The round insn does not trap on denormals. */
30016 if (flag_trapping_math || !TARGET_ROUND)
30019 if (out_mode == SImode && in_mode == DFmode)
30021 if (out_n == 4 && in_n == 2)
30022 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
30023 else if (out_n == 8 && in_n == 4)
30024 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
30028 case BUILT_IN_IROUNDF:
30029 case BUILT_IN_LROUNDF:
30030 case BUILT_IN_LLROUNDF:
30031 /* The round insn does not trap on denormals. */
30032 if (flag_trapping_math || !TARGET_ROUND)
30035 if (out_mode == SImode && in_mode == SFmode)
30037 if (out_n == 4 && in_n == 4)
30038 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
30039 else if (out_n == 8 && in_n == 8)
30040 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
30044 case BUILT_IN_COPYSIGN:
30045 if (out_mode == DFmode && in_mode == DFmode)
30047 if (out_n == 2 && in_n == 2)
30048 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
30049 else if (out_n == 4 && in_n == 4)
30050 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
30054 case BUILT_IN_COPYSIGNF:
30055 if (out_mode == SFmode && in_mode == SFmode)
30057 if (out_n == 4 && in_n == 4)
30058 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
30059 else if (out_n == 8 && in_n == 8)
30060 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
30064 case BUILT_IN_FLOOR:
30065 /* The round insn does not trap on denormals. */
30066 if (flag_trapping_math || !TARGET_ROUND)
30069 if (out_mode == DFmode && in_mode == DFmode)
30071 if (out_n == 2 && in_n == 2)
30072 return ix86_builtins[IX86_BUILTIN_FLOORPD];
30073 else if (out_n == 4 && in_n == 4)
30074 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
30078 case BUILT_IN_FLOORF:
30079 /* The round insn does not trap on denormals. */
30080 if (flag_trapping_math || !TARGET_ROUND)
30083 if (out_mode == SFmode && in_mode == SFmode)
30085 if (out_n == 4 && in_n == 4)
30086 return ix86_builtins[IX86_BUILTIN_FLOORPS];
30087 else if (out_n == 8 && in_n == 8)
30088 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
30092 case BUILT_IN_CEIL:
30093 /* The round insn does not trap on denormals. */
30094 if (flag_trapping_math || !TARGET_ROUND)
30097 if (out_mode == DFmode && in_mode == DFmode)
30099 if (out_n == 2 && in_n == 2)
30100 return ix86_builtins[IX86_BUILTIN_CEILPD];
30101 else if (out_n == 4 && in_n == 4)
30102 return ix86_builtins[IX86_BUILTIN_CEILPD256];
30106 case BUILT_IN_CEILF:
30107 /* The round insn does not trap on denormals. */
30108 if (flag_trapping_math || !TARGET_ROUND)
30111 if (out_mode == SFmode && in_mode == SFmode)
30113 if (out_n == 4 && in_n == 4)
30114 return ix86_builtins[IX86_BUILTIN_CEILPS];
30115 else if (out_n == 8 && in_n == 8)
30116 return ix86_builtins[IX86_BUILTIN_CEILPS256];
30120 case BUILT_IN_TRUNC:
30121 /* The round insn does not trap on denormals. */
30122 if (flag_trapping_math || !TARGET_ROUND)
30125 if (out_mode == DFmode && in_mode == DFmode)
30127 if (out_n == 2 && in_n == 2)
30128 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
30129 else if (out_n == 4 && in_n == 4)
30130 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
30134 case BUILT_IN_TRUNCF:
30135 /* The round insn does not trap on denormals. */
30136 if (flag_trapping_math || !TARGET_ROUND)
30139 if (out_mode == SFmode && in_mode == SFmode)
30141 if (out_n == 4 && in_n == 4)
30142 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
30143 else if (out_n == 8 && in_n == 8)
30144 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
30148 case BUILT_IN_RINT:
30149 /* The round insn does not trap on denormals. */
30150 if (flag_trapping_math || !TARGET_ROUND)
30153 if (out_mode == DFmode && in_mode == DFmode)
30155 if (out_n == 2 && in_n == 2)
30156 return ix86_builtins[IX86_BUILTIN_RINTPD];
30157 else if (out_n == 4 && in_n == 4)
30158 return ix86_builtins[IX86_BUILTIN_RINTPD256];
30162 case BUILT_IN_RINTF:
30163 /* The round insn does not trap on denormals. */
30164 if (flag_trapping_math || !TARGET_ROUND)
30167 if (out_mode == SFmode && in_mode == SFmode)
30169 if (out_n == 4 && in_n == 4)
30170 return ix86_builtins[IX86_BUILTIN_RINTPS];
30171 else if (out_n == 8 && in_n == 8)
30172 return ix86_builtins[IX86_BUILTIN_RINTPS256];
30176 case BUILT_IN_ROUND:
30177 /* The round insn does not trap on denormals. */
30178 if (flag_trapping_math || !TARGET_ROUND)
30181 if (out_mode == DFmode && in_mode == DFmode)
30183 if (out_n == 2 && in_n == 2)
30184 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
30185 else if (out_n == 4 && in_n == 4)
30186 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
30190 case BUILT_IN_ROUNDF:
30191 /* The round insn does not trap on denormals. */
30192 if (flag_trapping_math || !TARGET_ROUND)
30195 if (out_mode == SFmode && in_mode == SFmode)
30197 if (out_n == 4 && in_n == 4)
30198 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
30199 else if (out_n == 8 && in_n == 8)
30200 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
30205 if (out_mode == DFmode && in_mode == DFmode)
30207 if (out_n == 2 && in_n == 2)
30208 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
30209 if (out_n == 4 && in_n == 4)
30210 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
30214 case BUILT_IN_FMAF:
30215 if (out_mode == SFmode && in_mode == SFmode)
30217 if (out_n == 4 && in_n == 4)
30218 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
30219 if (out_n == 8 && in_n == 8)
30220 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
30228 /* Dispatch to a handler for a vectorization library. */
30229 if (ix86_veclib_handler)
30230 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
30236 /* Handler for an SVML-style interface to
30237 a library with vectorized intrinsics. */
30240 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
30243 tree fntype, new_fndecl, args;
30246 enum machine_mode el_mode, in_mode;
30249 /* The SVML is suitable for unsafe math only. */
30250 if (!flag_unsafe_math_optimizations)
30253 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30254 n = TYPE_VECTOR_SUBPARTS (type_out);
30255 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30256 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30257 if (el_mode != in_mode
30265 case BUILT_IN_LOG10:
30267 case BUILT_IN_TANH:
30269 case BUILT_IN_ATAN:
30270 case BUILT_IN_ATAN2:
30271 case BUILT_IN_ATANH:
30272 case BUILT_IN_CBRT:
30273 case BUILT_IN_SINH:
30275 case BUILT_IN_ASINH:
30276 case BUILT_IN_ASIN:
30277 case BUILT_IN_COSH:
30279 case BUILT_IN_ACOSH:
30280 case BUILT_IN_ACOS:
30281 if (el_mode != DFmode || n != 2)
30285 case BUILT_IN_EXPF:
30286 case BUILT_IN_LOGF:
30287 case BUILT_IN_LOG10F:
30288 case BUILT_IN_POWF:
30289 case BUILT_IN_TANHF:
30290 case BUILT_IN_TANF:
30291 case BUILT_IN_ATANF:
30292 case BUILT_IN_ATAN2F:
30293 case BUILT_IN_ATANHF:
30294 case BUILT_IN_CBRTF:
30295 case BUILT_IN_SINHF:
30296 case BUILT_IN_SINF:
30297 case BUILT_IN_ASINHF:
30298 case BUILT_IN_ASINF:
30299 case BUILT_IN_COSHF:
30300 case BUILT_IN_COSF:
30301 case BUILT_IN_ACOSHF:
30302 case BUILT_IN_ACOSF:
30303 if (el_mode != SFmode || n != 4)
30311 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30313 if (fn == BUILT_IN_LOGF)
30314 strcpy (name, "vmlsLn4");
30315 else if (fn == BUILT_IN_LOG)
30316 strcpy (name, "vmldLn2");
30319 sprintf (name, "vmls%s", bname+10);
30320 name[strlen (name)-1] = '4';
30323 sprintf (name, "vmld%s2", bname+10);
30325 /* Convert to uppercase. */
30329 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30331 args = TREE_CHAIN (args))
30335 fntype = build_function_type_list (type_out, type_in, NULL);
30337 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30339 /* Build a function declaration for the vectorized function. */
30340 new_fndecl = build_decl (BUILTINS_LOCATION,
30341 FUNCTION_DECL, get_identifier (name), fntype);
30342 TREE_PUBLIC (new_fndecl) = 1;
30343 DECL_EXTERNAL (new_fndecl) = 1;
30344 DECL_IS_NOVOPS (new_fndecl) = 1;
30345 TREE_READONLY (new_fndecl) = 1;
30350 /* Handler for an ACML-style interface to
30351 a library with vectorized intrinsics. */
30354 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
30356 char name[20] = "__vr.._";
30357 tree fntype, new_fndecl, args;
30360 enum machine_mode el_mode, in_mode;
30363 /* The ACML is 64bits only and suitable for unsafe math only as
30364 it does not correctly support parts of IEEE with the required
30365 precision such as denormals. */
30367 || !flag_unsafe_math_optimizations)
30370 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30371 n = TYPE_VECTOR_SUBPARTS (type_out);
30372 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30373 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30374 if (el_mode != in_mode
30384 case BUILT_IN_LOG2:
30385 case BUILT_IN_LOG10:
30388 if (el_mode != DFmode
30393 case BUILT_IN_SINF:
30394 case BUILT_IN_COSF:
30395 case BUILT_IN_EXPF:
30396 case BUILT_IN_POWF:
30397 case BUILT_IN_LOGF:
30398 case BUILT_IN_LOG2F:
30399 case BUILT_IN_LOG10F:
30402 if (el_mode != SFmode
30411 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30412 sprintf (name + 7, "%s", bname+10);
30415 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30417 args = TREE_CHAIN (args))
30421 fntype = build_function_type_list (type_out, type_in, NULL);
30423 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30425 /* Build a function declaration for the vectorized function. */
30426 new_fndecl = build_decl (BUILTINS_LOCATION,
30427 FUNCTION_DECL, get_identifier (name), fntype);
30428 TREE_PUBLIC (new_fndecl) = 1;
30429 DECL_EXTERNAL (new_fndecl) = 1;
30430 DECL_IS_NOVOPS (new_fndecl) = 1;
30431 TREE_READONLY (new_fndecl) = 1;
30436 /* Returns a decl of a function that implements gather load with
30437 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30438 Return NULL_TREE if it is not available. */
30441 ix86_vectorize_builtin_gather (const_tree mem_vectype,
30442 const_tree index_type, int scale)
30445 enum ix86_builtins code;
30450 if ((TREE_CODE (index_type) != INTEGER_TYPE
30451 && !POINTER_TYPE_P (index_type))
30452 || (TYPE_MODE (index_type) != SImode
30453 && TYPE_MODE (index_type) != DImode))
30456 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
30459 /* v*gather* insn sign extends index to pointer mode. */
30460 if (TYPE_PRECISION (index_type) < POINTER_SIZE
30461 && TYPE_UNSIGNED (index_type))
30466 || (scale & (scale - 1)) != 0)
30469 si = TYPE_MODE (index_type) == SImode;
30470 switch (TYPE_MODE (mem_vectype))
30473 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
30476 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
30479 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
30482 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
30485 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
30488 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
30491 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
30494 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
30500 return ix86_builtins[code];
30503 /* Returns a code for a target-specific builtin that implements
30504 reciprocal of the function, or NULL_TREE if not available. */
30507 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
30508 bool sqrt ATTRIBUTE_UNUSED)
30510 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
30511 && flag_finite_math_only && !flag_trapping_math
30512 && flag_unsafe_math_optimizations))
30516 /* Machine dependent builtins. */
30519 /* Vectorized version of sqrt to rsqrt conversion. */
30520 case IX86_BUILTIN_SQRTPS_NR:
30521 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
30523 case IX86_BUILTIN_SQRTPS_NR256:
30524 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
30530 /* Normal builtins. */
30533 /* Sqrt to rsqrt conversion. */
30534 case BUILT_IN_SQRTF:
30535 return ix86_builtins[IX86_BUILTIN_RSQRTF];
30542 /* Helper for avx_vpermilps256_operand et al. This is also used by
30543 the expansion functions to turn the parallel back into a mask.
30544 The return value is 0 for no match and the imm8+1 for a match. */
30547 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
30549 unsigned i, nelt = GET_MODE_NUNITS (mode);
30551 unsigned char ipar[8];
30553 if (XVECLEN (par, 0) != (int) nelt)
30556 /* Validate that all of the elements are constants, and not totally
30557 out of range. Copy the data into an integral array to make the
30558 subsequent checks easier. */
30559 for (i = 0; i < nelt; ++i)
30561 rtx er = XVECEXP (par, 0, i);
30562 unsigned HOST_WIDE_INT ei;
30564 if (!CONST_INT_P (er))
30575 /* In the 256-bit DFmode case, we can only move elements within
30577 for (i = 0; i < 2; ++i)
30581 mask |= ipar[i] << i;
30583 for (i = 2; i < 4; ++i)
30587 mask |= (ipar[i] - 2) << i;
30592 /* In the 256-bit SFmode case, we have full freedom of movement
30593 within the low 128-bit lane, but the high 128-bit lane must
30594 mirror the exact same pattern. */
30595 for (i = 0; i < 4; ++i)
30596 if (ipar[i] + 4 != ipar[i + 4])
30603 /* In the 128-bit case, we've full freedom in the placement of
30604 the elements from the source operand. */
30605 for (i = 0; i < nelt; ++i)
30606 mask |= ipar[i] << (i * (nelt / 2));
30610 gcc_unreachable ();
30613 /* Make sure success has a non-zero value by adding one. */
30617 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30618 the expansion functions to turn the parallel back into a mask.
30619 The return value is 0 for no match and the imm8+1 for a match. */
30622 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
30624 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
30626 unsigned char ipar[8];
30628 if (XVECLEN (par, 0) != (int) nelt)
30631 /* Validate that all of the elements are constants, and not totally
30632 out of range. Copy the data into an integral array to make the
30633 subsequent checks easier. */
30634 for (i = 0; i < nelt; ++i)
30636 rtx er = XVECEXP (par, 0, i);
30637 unsigned HOST_WIDE_INT ei;
30639 if (!CONST_INT_P (er))
30642 if (ei >= 2 * nelt)
30647 /* Validate that the halves of the permute are halves. */
30648 for (i = 0; i < nelt2 - 1; ++i)
30649 if (ipar[i] + 1 != ipar[i + 1])
30651 for (i = nelt2; i < nelt - 1; ++i)
30652 if (ipar[i] + 1 != ipar[i + 1])
30655 /* Reconstruct the mask. */
30656 for (i = 0; i < 2; ++i)
30658 unsigned e = ipar[i * nelt2];
30662 mask |= e << (i * 4);
30665 /* Make sure success has a non-zero value by adding one. */
30669 /* Store OPERAND to the memory after reload is completed. This means
30670 that we can't easily use assign_stack_local. */
30672 ix86_force_to_memory (enum machine_mode mode, rtx operand)
30676 gcc_assert (reload_completed);
30677 if (ix86_using_red_zone ())
30679 result = gen_rtx_MEM (mode,
30680 gen_rtx_PLUS (Pmode,
30682 GEN_INT (-RED_ZONE_SIZE)));
30683 emit_move_insn (result, operand);
30685 else if (TARGET_64BIT)
30691 operand = gen_lowpart (DImode, operand);
30695 gen_rtx_SET (VOIDmode,
30696 gen_rtx_MEM (DImode,
30697 gen_rtx_PRE_DEC (DImode,
30698 stack_pointer_rtx)),
30702 gcc_unreachable ();
30704 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30713 split_double_mode (mode, &operand, 1, operands, operands + 1);
30715 gen_rtx_SET (VOIDmode,
30716 gen_rtx_MEM (SImode,
30717 gen_rtx_PRE_DEC (Pmode,
30718 stack_pointer_rtx)),
30721 gen_rtx_SET (VOIDmode,
30722 gen_rtx_MEM (SImode,
30723 gen_rtx_PRE_DEC (Pmode,
30724 stack_pointer_rtx)),
30729 /* Store HImodes as SImodes. */
30730 operand = gen_lowpart (SImode, operand);
30734 gen_rtx_SET (VOIDmode,
30735 gen_rtx_MEM (GET_MODE (operand),
30736 gen_rtx_PRE_DEC (SImode,
30737 stack_pointer_rtx)),
30741 gcc_unreachable ();
30743 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30748 /* Free operand from the memory. */
30750 ix86_free_from_memory (enum machine_mode mode)
30752 if (!ix86_using_red_zone ())
30756 if (mode == DImode || TARGET_64BIT)
30760 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30761 to pop or add instruction if registers are available. */
30762 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
30763 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
30768 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30770 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30771 QImode must go into class Q_REGS.
30772 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30773 movdf to do mem-to-mem moves through integer regs. */
30776 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
30778 enum machine_mode mode = GET_MODE (x);
30780 /* We're only allowed to return a subclass of CLASS. Many of the
30781 following checks fail for NO_REGS, so eliminate that early. */
30782 if (regclass == NO_REGS)
30785 /* All classes can load zeros. */
30786 if (x == CONST0_RTX (mode))
30789 /* Force constants into memory if we are loading a (nonzero) constant into
30790 an MMX or SSE register. This is because there are no MMX/SSE instructions
30791 to load from a constant. */
30793 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
30796 /* Prefer SSE regs only, if we can use them for math. */
30797 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
30798 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
30800 /* Floating-point constants need more complex checks. */
30801 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
30803 /* General regs can load everything. */
30804 if (reg_class_subset_p (regclass, GENERAL_REGS))
30807 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30808 zero above. We only want to wind up preferring 80387 registers if
30809 we plan on doing computation with them. */
30811 && standard_80387_constant_p (x) > 0)
30813 /* Limit class to non-sse. */
30814 if (regclass == FLOAT_SSE_REGS)
30816 if (regclass == FP_TOP_SSE_REGS)
30818 if (regclass == FP_SECOND_SSE_REGS)
30819 return FP_SECOND_REG;
30820 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
30827 /* Generally when we see PLUS here, it's the function invariant
30828 (plus soft-fp const_int). Which can only be computed into general
30830 if (GET_CODE (x) == PLUS)
30831 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
30833 /* QImode constants are easy to load, but non-constant QImode data
30834 must go into Q_REGS. */
30835 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
30837 if (reg_class_subset_p (regclass, Q_REGS))
30839 if (reg_class_subset_p (Q_REGS, regclass))
30847 /* Discourage putting floating-point values in SSE registers unless
30848 SSE math is being used, and likewise for the 387 registers. */
30850 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
30852 enum machine_mode mode = GET_MODE (x);
30854 /* Restrict the output reload class to the register bank that we are doing
30855 math on. If we would like not to return a subset of CLASS, reject this
30856 alternative: if reload cannot do this, it will still use its choice. */
30857 mode = GET_MODE (x);
30858 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
30859 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
30861 if (X87_FLOAT_MODE_P (mode))
30863 if (regclass == FP_TOP_SSE_REGS)
30865 else if (regclass == FP_SECOND_SSE_REGS)
30866 return FP_SECOND_REG;
30868 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
30875 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
30876 enum machine_mode mode, secondary_reload_info *sri)
30878 /* Double-word spills from general registers to non-offsettable memory
30879 references (zero-extended addresses) require special handling. */
30882 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
30883 && rclass == GENERAL_REGS
30884 && !offsettable_memref_p (x))
30887 ? CODE_FOR_reload_noff_load
30888 : CODE_FOR_reload_noff_store);
30889 /* Add the cost of moving address to a temporary. */
30890 sri->extra_cost = 1;
30895 /* QImode spills from non-QI registers require
30896 intermediate register on 32bit targets. */
30898 && !in_p && mode == QImode
30899 && (rclass == GENERAL_REGS
30900 || rclass == LEGACY_REGS
30901 || rclass == INDEX_REGS))
30910 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
30911 regno = true_regnum (x);
30913 /* Return Q_REGS if the operand is in memory. */
30918 /* This condition handles corner case where an expression involving
30919 pointers gets vectorized. We're trying to use the address of a
30920 stack slot as a vector initializer.
30922 (set (reg:V2DI 74 [ vect_cst_.2 ])
30923 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30925 Eventually frame gets turned into sp+offset like this:
30927 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30928 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30929 (const_int 392 [0x188]))))
30931 That later gets turned into:
30933 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30934 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30935 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30937 We'll have the following reload recorded:
30939 Reload 0: reload_in (DI) =
30940 (plus:DI (reg/f:DI 7 sp)
30941 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30942 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30943 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30944 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30945 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30946 reload_reg_rtx: (reg:V2DI 22 xmm1)
30948 Which isn't going to work since SSE instructions can't handle scalar
30949 additions. Returning GENERAL_REGS forces the addition into integer
30950 register and reload can handle subsequent reloads without problems. */
30952 if (in_p && GET_CODE (x) == PLUS
30953 && SSE_CLASS_P (rclass)
30954 && SCALAR_INT_MODE_P (mode))
30955 return GENERAL_REGS;
30960 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30963 ix86_class_likely_spilled_p (reg_class_t rclass)
30974 case SSE_FIRST_REG:
30976 case FP_SECOND_REG:
30986 /* If we are copying between general and FP registers, we need a memory
30987 location. The same is true for SSE and MMX registers.
30989 To optimize register_move_cost performance, allow inline variant.
30991 The macro can't work reliably when one of the CLASSES is class containing
30992 registers from multiple units (SSE, MMX, integer). We avoid this by never
30993 combining those units in single alternative in the machine description.
30994 Ensure that this constraint holds to avoid unexpected surprises.
30996 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30997 enforce these sanity checks. */
31000 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
31001 enum machine_mode mode, int strict)
31003 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
31004 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
31005 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
31006 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
31007 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
31008 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
31010 gcc_assert (!strict);
31014 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
31017 /* ??? This is a lie. We do have moves between mmx/general, and for
31018 mmx/sse2. But by saying we need secondary memory we discourage the
31019 register allocator from using the mmx registers unless needed. */
31020 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
31023 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31025 /* SSE1 doesn't have any direct moves from other classes. */
31029 /* If the target says that inter-unit moves are more expensive
31030 than moving through memory, then don't generate them. */
31031 if (!TARGET_INTER_UNIT_MOVES)
31034 /* Between SSE and general, we have moves no larger than word size. */
31035 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
31043 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
31044 enum machine_mode mode, int strict)
31046 return inline_secondary_memory_needed (class1, class2, mode, strict);
31049 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31051 On the 80386, this is the size of MODE in words,
31052 except in the FP regs, where a single reg is always enough. */
31054 static unsigned char
31055 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
31057 if (MAYBE_INTEGER_CLASS_P (rclass))
31059 if (mode == XFmode)
31060 return (TARGET_64BIT ? 2 : 3);
31061 else if (mode == XCmode)
31062 return (TARGET_64BIT ? 4 : 6);
31064 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
31068 if (COMPLEX_MODE_P (mode))
31075 /* Return true if the registers in CLASS cannot represent the change from
31076 modes FROM to TO. */
31079 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
31080 enum reg_class regclass)
31085 /* x87 registers can't do subreg at all, as all values are reformatted
31086 to extended precision. */
31087 if (MAYBE_FLOAT_CLASS_P (regclass))
31090 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
31092 /* Vector registers do not support QI or HImode loads. If we don't
31093 disallow a change to these modes, reload will assume it's ok to
31094 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31095 the vec_dupv4hi pattern. */
31096 if (GET_MODE_SIZE (from) < 4)
31099 /* Vector registers do not support subreg with nonzero offsets, which
31100 are otherwise valid for integer registers. Since we can't see
31101 whether we have a nonzero offset from here, prohibit all
31102 nonparadoxical subregs changing size. */
31103 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
31110 /* Return the cost of moving data of mode M between a
31111 register and memory. A value of 2 is the default; this cost is
31112 relative to those in `REGISTER_MOVE_COST'.
31114 This function is used extensively by register_move_cost that is used to
31115 build tables at startup. Make it inline in this case.
31116 When IN is 2, return maximum of in and out move cost.
31118 If moving between registers and memory is more expensive than
31119 between two registers, you should define this macro to express the
31122 Model also increased moving costs of QImode registers in non
31126 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
31130 if (FLOAT_CLASS_P (regclass))
31148 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
31149 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
31151 if (SSE_CLASS_P (regclass))
31154 switch (GET_MODE_SIZE (mode))
31169 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
31170 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
31172 if (MMX_CLASS_P (regclass))
31175 switch (GET_MODE_SIZE (mode))
31187 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
31188 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
31190 switch (GET_MODE_SIZE (mode))
31193 if (Q_CLASS_P (regclass) || TARGET_64BIT)
31196 return ix86_cost->int_store[0];
31197 if (TARGET_PARTIAL_REG_DEPENDENCY
31198 && optimize_function_for_speed_p (cfun))
31199 cost = ix86_cost->movzbl_load;
31201 cost = ix86_cost->int_load[0];
31203 return MAX (cost, ix86_cost->int_store[0]);
31209 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
31211 return ix86_cost->movzbl_load;
31213 return ix86_cost->int_store[0] + 4;
31218 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
31219 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
31221 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31222 if (mode == TFmode)
31225 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
31227 cost = ix86_cost->int_load[2];
31229 cost = ix86_cost->int_store[2];
31230 return (cost * (((int) GET_MODE_SIZE (mode)
31231 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
31236 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
31239 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
31243 /* Return the cost of moving data from a register in class CLASS1 to
31244 one in class CLASS2.
31246 It is not required that the cost always equal 2 when FROM is the same as TO;
31247 on some machines it is expensive to move between registers if they are not
31248 general registers. */
31251 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
31252 reg_class_t class2_i)
31254 enum reg_class class1 = (enum reg_class) class1_i;
31255 enum reg_class class2 = (enum reg_class) class2_i;
31257 /* In case we require secondary memory, compute cost of the store followed
31258 by load. In order to avoid bad register allocation choices, we need
31259 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31261 if (inline_secondary_memory_needed (class1, class2, mode, 0))
31265 cost += inline_memory_move_cost (mode, class1, 2);
31266 cost += inline_memory_move_cost (mode, class2, 2);
31268 /* In case of copying from general_purpose_register we may emit multiple
31269 stores followed by single load causing memory size mismatch stall.
31270 Count this as arbitrarily high cost of 20. */
31271 if (targetm.class_max_nregs (class1, mode)
31272 > targetm.class_max_nregs (class2, mode))
31275 /* In the case of FP/MMX moves, the registers actually overlap, and we
31276 have to switch modes in order to treat them differently. */
31277 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
31278 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
31284 /* Moves between SSE/MMX and integer unit are expensive. */
31285 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
31286 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31288 /* ??? By keeping returned value relatively high, we limit the number
31289 of moves between integer and MMX/SSE registers for all targets.
31290 Additionally, high value prevents problem with x86_modes_tieable_p(),
31291 where integer modes in MMX/SSE registers are not tieable
31292 because of missing QImode and HImode moves to, from or between
31293 MMX/SSE registers. */
31294 return MAX (8, ix86_cost->mmxsse_to_integer);
31296 if (MAYBE_FLOAT_CLASS_P (class1))
31297 return ix86_cost->fp_move;
31298 if (MAYBE_SSE_CLASS_P (class1))
31299 return ix86_cost->sse_move;
31300 if (MAYBE_MMX_CLASS_P (class1))
31301 return ix86_cost->mmx_move;
31305 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31309 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
31311 /* Flags and only flags can only hold CCmode values. */
31312 if (CC_REGNO_P (regno))
31313 return GET_MODE_CLASS (mode) == MODE_CC;
31314 if (GET_MODE_CLASS (mode) == MODE_CC
31315 || GET_MODE_CLASS (mode) == MODE_RANDOM
31316 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
31318 if (FP_REGNO_P (regno))
31319 return VALID_FP_MODE_P (mode);
31320 if (SSE_REGNO_P (regno))
31322 /* We implement the move patterns for all vector modes into and
31323 out of SSE registers, even when no operation instructions
31324 are available. OImode move is available only when AVX is
31326 return ((TARGET_AVX && mode == OImode)
31327 || VALID_AVX256_REG_MODE (mode)
31328 || VALID_SSE_REG_MODE (mode)
31329 || VALID_SSE2_REG_MODE (mode)
31330 || VALID_MMX_REG_MODE (mode)
31331 || VALID_MMX_REG_MODE_3DNOW (mode));
31333 if (MMX_REGNO_P (regno))
31335 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31336 so if the register is available at all, then we can move data of
31337 the given mode into or out of it. */
31338 return (VALID_MMX_REG_MODE (mode)
31339 || VALID_MMX_REG_MODE_3DNOW (mode));
31342 if (mode == QImode)
31344 /* Take care for QImode values - they can be in non-QI regs,
31345 but then they do cause partial register stalls. */
31346 if (regno <= BX_REG || TARGET_64BIT)
31348 if (!TARGET_PARTIAL_REG_STALL)
31350 return !can_create_pseudo_p ();
31352 /* We handle both integer and floats in the general purpose registers. */
31353 else if (VALID_INT_MODE_P (mode))
31355 else if (VALID_FP_MODE_P (mode))
31357 else if (VALID_DFP_MODE_P (mode))
31359 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31360 on to use that value in smaller contexts, this can easily force a
31361 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31362 supporting DImode, allow it. */
31363 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
31369 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31370 tieable integer mode. */
31373 ix86_tieable_integer_mode_p (enum machine_mode mode)
31382 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
31385 return TARGET_64BIT;
31392 /* Return true if MODE1 is accessible in a register that can hold MODE2
31393 without copying. That is, all register classes that can hold MODE2
31394 can also hold MODE1. */
31397 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
31399 if (mode1 == mode2)
31402 if (ix86_tieable_integer_mode_p (mode1)
31403 && ix86_tieable_integer_mode_p (mode2))
31406 /* MODE2 being XFmode implies fp stack or general regs, which means we
31407 can tie any smaller floating point modes to it. Note that we do not
31408 tie this with TFmode. */
31409 if (mode2 == XFmode)
31410 return mode1 == SFmode || mode1 == DFmode;
31412 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31413 that we can tie it with SFmode. */
31414 if (mode2 == DFmode)
31415 return mode1 == SFmode;
31417 /* If MODE2 is only appropriate for an SSE register, then tie with
31418 any other mode acceptable to SSE registers. */
31419 if (GET_MODE_SIZE (mode2) == 16
31420 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
31421 return (GET_MODE_SIZE (mode1) == 16
31422 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
31424 /* If MODE2 is appropriate for an MMX register, then tie
31425 with any other mode acceptable to MMX registers. */
31426 if (GET_MODE_SIZE (mode2) == 8
31427 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
31428 return (GET_MODE_SIZE (mode1) == 8
31429 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
31434 /* Compute a (partial) cost for rtx X. Return true if the complete
31435 cost has been computed, and false if subexpressions should be
31436 scanned. In either case, *TOTAL contains the cost result. */
31439 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
31442 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
31443 enum machine_mode mode = GET_MODE (x);
31444 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
31452 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
31454 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
31456 else if (flag_pic && SYMBOLIC_CONST (x)
31458 || (!GET_CODE (x) != LABEL_REF
31459 && (GET_CODE (x) != SYMBOL_REF
31460 || !SYMBOL_REF_LOCAL_P (x)))))
31467 if (mode == VOIDmode)
31470 switch (standard_80387_constant_p (x))
31475 default: /* Other constants */
31480 /* Start with (MEM (SYMBOL_REF)), since that's where
31481 it'll probably end up. Add a penalty for size. */
31482 *total = (COSTS_N_INSNS (1)
31483 + (flag_pic != 0 && !TARGET_64BIT)
31484 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
31490 /* The zero extensions is often completely free on x86_64, so make
31491 it as cheap as possible. */
31492 if (TARGET_64BIT && mode == DImode
31493 && GET_MODE (XEXP (x, 0)) == SImode)
31495 else if (TARGET_ZERO_EXTEND_WITH_AND)
31496 *total = cost->add;
31498 *total = cost->movzx;
31502 *total = cost->movsx;
31506 if (CONST_INT_P (XEXP (x, 1))
31507 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
31509 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31512 *total = cost->add;
31515 if ((value == 2 || value == 3)
31516 && cost->lea <= cost->shift_const)
31518 *total = cost->lea;
31528 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
31530 if (CONST_INT_P (XEXP (x, 1)))
31532 if (INTVAL (XEXP (x, 1)) > 32)
31533 *total = cost->shift_const + COSTS_N_INSNS (2);
31535 *total = cost->shift_const * 2;
31539 if (GET_CODE (XEXP (x, 1)) == AND)
31540 *total = cost->shift_var * 2;
31542 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
31547 if (CONST_INT_P (XEXP (x, 1)))
31548 *total = cost->shift_const;
31550 *total = cost->shift_var;
31558 gcc_assert (FLOAT_MODE_P (mode));
31559 gcc_assert (TARGET_FMA || TARGET_FMA4);
31561 /* ??? SSE scalar/vector cost should be used here. */
31562 /* ??? Bald assumption that fma has the same cost as fmul. */
31563 *total = cost->fmul;
31564 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
31566 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31568 if (GET_CODE (sub) == NEG)
31569 sub = XEXP (sub, 0);
31570 *total += rtx_cost (sub, FMA, 0, speed);
31573 if (GET_CODE (sub) == NEG)
31574 sub = XEXP (sub, 0);
31575 *total += rtx_cost (sub, FMA, 2, speed);
31580 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31582 /* ??? SSE scalar cost should be used here. */
31583 *total = cost->fmul;
31586 else if (X87_FLOAT_MODE_P (mode))
31588 *total = cost->fmul;
31591 else if (FLOAT_MODE_P (mode))
31593 /* ??? SSE vector cost should be used here. */
31594 *total = cost->fmul;
31599 rtx op0 = XEXP (x, 0);
31600 rtx op1 = XEXP (x, 1);
31602 if (CONST_INT_P (XEXP (x, 1)))
31604 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31605 for (nbits = 0; value != 0; value &= value - 1)
31609 /* This is arbitrary. */
31612 /* Compute costs correctly for widening multiplication. */
31613 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
31614 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
31615 == GET_MODE_SIZE (mode))
31617 int is_mulwiden = 0;
31618 enum machine_mode inner_mode = GET_MODE (op0);
31620 if (GET_CODE (op0) == GET_CODE (op1))
31621 is_mulwiden = 1, op1 = XEXP (op1, 0);
31622 else if (CONST_INT_P (op1))
31624 if (GET_CODE (op0) == SIGN_EXTEND)
31625 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
31628 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
31632 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
31635 *total = (cost->mult_init[MODE_INDEX (mode)]
31636 + nbits * cost->mult_bit
31637 + rtx_cost (op0, outer_code, opno, speed)
31638 + rtx_cost (op1, outer_code, opno, speed));
31647 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31648 /* ??? SSE cost should be used here. */
31649 *total = cost->fdiv;
31650 else if (X87_FLOAT_MODE_P (mode))
31651 *total = cost->fdiv;
31652 else if (FLOAT_MODE_P (mode))
31653 /* ??? SSE vector cost should be used here. */
31654 *total = cost->fdiv;
31656 *total = cost->divide[MODE_INDEX (mode)];
31660 if (GET_MODE_CLASS (mode) == MODE_INT
31661 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
31663 if (GET_CODE (XEXP (x, 0)) == PLUS
31664 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
31665 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
31666 && CONSTANT_P (XEXP (x, 1)))
31668 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
31669 if (val == 2 || val == 4 || val == 8)
31671 *total = cost->lea;
31672 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31673 outer_code, opno, speed);
31674 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
31675 outer_code, opno, speed);
31676 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31680 else if (GET_CODE (XEXP (x, 0)) == MULT
31681 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
31683 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
31684 if (val == 2 || val == 4 || val == 8)
31686 *total = cost->lea;
31687 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31688 outer_code, opno, speed);
31689 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31693 else if (GET_CODE (XEXP (x, 0)) == PLUS)
31695 *total = cost->lea;
31696 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31697 outer_code, opno, speed);
31698 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31699 outer_code, opno, speed);
31700 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31707 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31709 /* ??? SSE cost should be used here. */
31710 *total = cost->fadd;
31713 else if (X87_FLOAT_MODE_P (mode))
31715 *total = cost->fadd;
31718 else if (FLOAT_MODE_P (mode))
31720 /* ??? SSE vector cost should be used here. */
31721 *total = cost->fadd;
31729 if (!TARGET_64BIT && mode == DImode)
31731 *total = (cost->add * 2
31732 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
31733 << (GET_MODE (XEXP (x, 0)) != DImode))
31734 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
31735 << (GET_MODE (XEXP (x, 1)) != DImode)));
31741 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31743 /* ??? SSE cost should be used here. */
31744 *total = cost->fchs;
31747 else if (X87_FLOAT_MODE_P (mode))
31749 *total = cost->fchs;
31752 else if (FLOAT_MODE_P (mode))
31754 /* ??? SSE vector cost should be used here. */
31755 *total = cost->fchs;
31761 if (!TARGET_64BIT && mode == DImode)
31762 *total = cost->add * 2;
31764 *total = cost->add;
31768 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
31769 && XEXP (XEXP (x, 0), 1) == const1_rtx
31770 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
31771 && XEXP (x, 1) == const0_rtx)
31773 /* This kind of construct is implemented using test[bwl].
31774 Treat it as if we had an AND. */
31775 *total = (cost->add
31776 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
31777 + rtx_cost (const1_rtx, outer_code, opno, speed));
31783 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
31788 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31789 /* ??? SSE cost should be used here. */
31790 *total = cost->fabs;
31791 else if (X87_FLOAT_MODE_P (mode))
31792 *total = cost->fabs;
31793 else if (FLOAT_MODE_P (mode))
31794 /* ??? SSE vector cost should be used here. */
31795 *total = cost->fabs;
31799 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31800 /* ??? SSE cost should be used here. */
31801 *total = cost->fsqrt;
31802 else if (X87_FLOAT_MODE_P (mode))
31803 *total = cost->fsqrt;
31804 else if (FLOAT_MODE_P (mode))
31805 /* ??? SSE vector cost should be used here. */
31806 *total = cost->fsqrt;
31810 if (XINT (x, 1) == UNSPEC_TP)
31817 case VEC_DUPLICATE:
31818 /* ??? Assume all of these vector manipulation patterns are
31819 recognizable. In which case they all pretty much have the
31821 *total = COSTS_N_INSNS (1);
31831 static int current_machopic_label_num;
31833 /* Given a symbol name and its associated stub, write out the
31834 definition of the stub. */
31837 machopic_output_stub (FILE *file, const char *symb, const char *stub)
31839 unsigned int length;
31840 char *binder_name, *symbol_name, lazy_ptr_name[32];
31841 int label = ++current_machopic_label_num;
31843 /* For 64-bit we shouldn't get here. */
31844 gcc_assert (!TARGET_64BIT);
31846 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31847 symb = targetm.strip_name_encoding (symb);
31849 length = strlen (stub);
31850 binder_name = XALLOCAVEC (char, length + 32);
31851 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
31853 length = strlen (symb);
31854 symbol_name = XALLOCAVEC (char, length + 32);
31855 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
31857 sprintf (lazy_ptr_name, "L%d$lz", label);
31859 if (MACHOPIC_ATT_STUB)
31860 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
31861 else if (MACHOPIC_PURE)
31862 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
31864 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
31866 fprintf (file, "%s:\n", stub);
31867 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31869 if (MACHOPIC_ATT_STUB)
31871 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31873 else if (MACHOPIC_PURE)
31876 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31877 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
31878 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31879 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31880 label, lazy_ptr_name, label);
31881 fprintf (file, "\tjmp\t*%%ecx\n");
31884 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
31886 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31887 it needs no stub-binding-helper. */
31888 if (MACHOPIC_ATT_STUB)
31891 fprintf (file, "%s:\n", binder_name);
31895 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
31896 fprintf (file, "\tpushl\t%%ecx\n");
31899 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
31901 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
31903 /* N.B. Keep the correspondence of these
31904 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31905 old-pic/new-pic/non-pic stubs; altering this will break
31906 compatibility with existing dylibs. */
31909 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31910 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
31913 /* 16-byte -mdynamic-no-pic stub. */
31914 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
31916 fprintf (file, "%s:\n", lazy_ptr_name);
31917 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31918 fprintf (file, ASM_LONG "%s\n", binder_name);
31920 #endif /* TARGET_MACHO */
31922 /* Order the registers for register allocator. */
31925 x86_order_regs_for_local_alloc (void)
31930 /* First allocate the local general purpose registers. */
31931 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31932 if (GENERAL_REGNO_P (i) && call_used_regs[i])
31933 reg_alloc_order [pos++] = i;
31935 /* Global general purpose registers. */
31936 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31937 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
31938 reg_alloc_order [pos++] = i;
31940 /* x87 registers come first in case we are doing FP math
31942 if (!TARGET_SSE_MATH)
31943 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31944 reg_alloc_order [pos++] = i;
31946 /* SSE registers. */
31947 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
31948 reg_alloc_order [pos++] = i;
31949 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
31950 reg_alloc_order [pos++] = i;
31952 /* x87 registers. */
31953 if (TARGET_SSE_MATH)
31954 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31955 reg_alloc_order [pos++] = i;
31957 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
31958 reg_alloc_order [pos++] = i;
31960 /* Initialize the rest of array as we do not allocate some registers
31962 while (pos < FIRST_PSEUDO_REGISTER)
31963 reg_alloc_order [pos++] = 0;
31966 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31967 in struct attribute_spec handler. */
31969 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
31971 int flags ATTRIBUTE_UNUSED,
31972 bool *no_add_attrs)
31974 if (TREE_CODE (*node) != FUNCTION_TYPE
31975 && TREE_CODE (*node) != METHOD_TYPE
31976 && TREE_CODE (*node) != FIELD_DECL
31977 && TREE_CODE (*node) != TYPE_DECL)
31979 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31981 *no_add_attrs = true;
31986 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
31988 *no_add_attrs = true;
31991 if (is_attribute_p ("callee_pop_aggregate_return", name))
31995 cst = TREE_VALUE (args);
31996 if (TREE_CODE (cst) != INTEGER_CST)
31998 warning (OPT_Wattributes,
31999 "%qE attribute requires an integer constant argument",
32001 *no_add_attrs = true;
32003 else if (compare_tree_int (cst, 0) != 0
32004 && compare_tree_int (cst, 1) != 0)
32006 warning (OPT_Wattributes,
32007 "argument to %qE attribute is neither zero, nor one",
32009 *no_add_attrs = true;
32018 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32019 struct attribute_spec.handler. */
32021 ix86_handle_abi_attribute (tree *node, tree name,
32022 tree args ATTRIBUTE_UNUSED,
32023 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32025 if (TREE_CODE (*node) != FUNCTION_TYPE
32026 && TREE_CODE (*node) != METHOD_TYPE
32027 && TREE_CODE (*node) != FIELD_DECL
32028 && TREE_CODE (*node) != TYPE_DECL)
32030 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32032 *no_add_attrs = true;
32036 /* Can combine regparm with all attributes but fastcall. */
32037 if (is_attribute_p ("ms_abi", name))
32039 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
32041 error ("ms_abi and sysv_abi attributes are not compatible");
32046 else if (is_attribute_p ("sysv_abi", name))
32048 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
32050 error ("ms_abi and sysv_abi attributes are not compatible");
32059 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32060 struct attribute_spec.handler. */
32062 ix86_handle_struct_attribute (tree *node, tree name,
32063 tree args ATTRIBUTE_UNUSED,
32064 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32067 if (DECL_P (*node))
32069 if (TREE_CODE (*node) == TYPE_DECL)
32070 type = &TREE_TYPE (*node);
32075 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
32077 warning (OPT_Wattributes, "%qE attribute ignored",
32079 *no_add_attrs = true;
32082 else if ((is_attribute_p ("ms_struct", name)
32083 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32084 || ((is_attribute_p ("gcc_struct", name)
32085 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32087 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32089 *no_add_attrs = true;
32096 ix86_handle_fndecl_attribute (tree *node, tree name,
32097 tree args ATTRIBUTE_UNUSED,
32098 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32100 if (TREE_CODE (*node) != FUNCTION_DECL)
32102 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32104 *no_add_attrs = true;
32110 ix86_ms_bitfield_layout_p (const_tree record_type)
32112 return ((TARGET_MS_BITFIELD_LAYOUT
32113 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32114 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
32117 /* Returns an expression indicating where the this parameter is
32118 located on entry to the FUNCTION. */
32121 x86_this_parameter (tree function)
32123 tree type = TREE_TYPE (function);
32124 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
32129 const int *parm_regs;
32131 if (ix86_function_type_abi (type) == MS_ABI)
32132 parm_regs = x86_64_ms_abi_int_parameter_registers;
32134 parm_regs = x86_64_int_parameter_registers;
32135 return gen_rtx_REG (DImode, parm_regs[aggr]);
32138 nregs = ix86_function_regparm (type, function);
32140 if (nregs > 0 && !stdarg_p (type))
32143 unsigned int ccvt = ix86_get_callcvt (type);
32145 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
32146 regno = aggr ? DX_REG : CX_REG;
32147 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
32151 return gen_rtx_MEM (SImode,
32152 plus_constant (stack_pointer_rtx, 4));
32161 return gen_rtx_MEM (SImode,
32162 plus_constant (stack_pointer_rtx, 4));
32165 return gen_rtx_REG (SImode, regno);
32168 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
32171 /* Determine whether x86_output_mi_thunk can succeed. */
32174 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
32175 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
32176 HOST_WIDE_INT vcall_offset, const_tree function)
32178 /* 64-bit can handle anything. */
32182 /* For 32-bit, everything's fine if we have one free register. */
32183 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
32186 /* Need a free register for vcall_offset. */
32190 /* Need a free register for GOT references. */
32191 if (flag_pic && !targetm.binds_local_p (function))
32194 /* Otherwise ok. */
32198 /* Output the assembler code for a thunk function. THUNK_DECL is the
32199 declaration for the thunk function itself, FUNCTION is the decl for
32200 the target function. DELTA is an immediate constant offset to be
32201 added to THIS. If VCALL_OFFSET is nonzero, the word at
32202 *(*this + vcall_offset) should be added to THIS. */
32205 x86_output_mi_thunk (FILE *file,
32206 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
32207 HOST_WIDE_INT vcall_offset, tree function)
32209 rtx this_param = x86_this_parameter (function);
32210 rtx this_reg, tmp, fnaddr;
32211 unsigned int tmp_regno;
32214 tmp_regno = R10_REG;
32217 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
32218 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
32219 tmp_regno = AX_REG;
32221 tmp_regno = CX_REG;
32224 emit_note (NOTE_INSN_PROLOGUE_END);
32226 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32227 pull it in now and let DELTA benefit. */
32228 if (REG_P (this_param))
32229 this_reg = this_param;
32230 else if (vcall_offset)
32232 /* Put the this parameter into %eax. */
32233 this_reg = gen_rtx_REG (Pmode, AX_REG);
32234 emit_move_insn (this_reg, this_param);
32237 this_reg = NULL_RTX;
32239 /* Adjust the this parameter by a fixed constant. */
32242 rtx delta_rtx = GEN_INT (delta);
32243 rtx delta_dst = this_reg ? this_reg : this_param;
32247 if (!x86_64_general_operand (delta_rtx, Pmode))
32249 tmp = gen_rtx_REG (Pmode, tmp_regno);
32250 emit_move_insn (tmp, delta_rtx);
32255 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
32258 /* Adjust the this parameter by a value stored in the vtable. */
32261 rtx vcall_addr, vcall_mem, this_mem;
32263 tmp = gen_rtx_REG (Pmode, tmp_regno);
32265 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
32266 if (Pmode != ptr_mode)
32267 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
32268 emit_move_insn (tmp, this_mem);
32270 /* Adjust the this parameter. */
32271 vcall_addr = plus_constant (tmp, vcall_offset);
32273 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
32275 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
32276 emit_move_insn (tmp2, GEN_INT (vcall_offset));
32277 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
32280 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
32281 if (Pmode != ptr_mode)
32282 emit_insn (gen_addsi_1_zext (this_reg,
32283 gen_rtx_REG (ptr_mode,
32287 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
32290 /* If necessary, drop THIS back to its stack slot. */
32291 if (this_reg && this_reg != this_param)
32292 emit_move_insn (this_param, this_reg);
32294 fnaddr = XEXP (DECL_RTL (function), 0);
32297 if (!flag_pic || targetm.binds_local_p (function)
32298 || cfun->machine->call_abi == MS_ABI)
32302 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
32303 tmp = gen_rtx_CONST (Pmode, tmp);
32304 fnaddr = gen_rtx_MEM (Pmode, tmp);
32309 if (!flag_pic || targetm.binds_local_p (function))
32312 else if (TARGET_MACHO)
32314 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
32315 fnaddr = XEXP (fnaddr, 0);
32317 #endif /* TARGET_MACHO */
32320 tmp = gen_rtx_REG (Pmode, CX_REG);
32321 output_set_got (tmp, NULL_RTX);
32323 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
32324 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
32325 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
32329 /* Our sibling call patterns do not allow memories, because we have no
32330 predicate that can distinguish between frame and non-frame memory.
32331 For our purposes here, we can get away with (ab)using a jump pattern,
32332 because we're going to do no optimization. */
32333 if (MEM_P (fnaddr))
32334 emit_jump_insn (gen_indirect_jump (fnaddr));
32337 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
32338 fnaddr = legitimize_pic_address (fnaddr,
32339 gen_rtx_REG (Pmode, tmp_regno));
32341 if (!sibcall_insn_operand (fnaddr, Pmode))
32343 tmp = gen_rtx_REG (Pmode, tmp_regno);
32344 if (GET_MODE (fnaddr) != Pmode)
32345 fnaddr = gen_rtx_ZERO_EXTEND (Pmode, fnaddr);
32346 emit_move_insn (tmp, fnaddr);
32350 tmp = gen_rtx_MEM (QImode, fnaddr);
32351 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
32352 tmp = emit_call_insn (tmp);
32353 SIBLING_CALL_P (tmp) = 1;
32357 /* Emit just enough of rest_of_compilation to get the insns emitted.
32358 Note that use_thunk calls assemble_start_function et al. */
32359 tmp = get_insns ();
32360 insn_locators_alloc ();
32361 shorten_branches (tmp);
32362 final_start_function (tmp, file, 1);
32363 final (tmp, file, 1);
32364 final_end_function ();
32368 x86_file_start (void)
32370 default_file_start ();
32372 darwin_file_start ();
32374 if (X86_FILE_START_VERSION_DIRECTIVE)
32375 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
32376 if (X86_FILE_START_FLTUSED)
32377 fputs ("\t.global\t__fltused\n", asm_out_file);
32378 if (ix86_asm_dialect == ASM_INTEL)
32379 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
32383 x86_field_alignment (tree field, int computed)
32385 enum machine_mode mode;
32386 tree type = TREE_TYPE (field);
32388 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
32390 mode = TYPE_MODE (strip_array_types (type));
32391 if (mode == DFmode || mode == DCmode
32392 || GET_MODE_CLASS (mode) == MODE_INT
32393 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
32394 return MIN (32, computed);
32398 /* Output assembler code to FILE to increment profiler label # LABELNO
32399 for profiling a function entry. */
32401 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
32403 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
32408 #ifndef NO_PROFILE_COUNTERS
32409 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
32412 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
32413 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
32415 fprintf (file, "\tcall\t%s\n", mcount_name);
32419 #ifndef NO_PROFILE_COUNTERS
32420 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
32423 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
32427 #ifndef NO_PROFILE_COUNTERS
32428 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
32431 fprintf (file, "\tcall\t%s\n", mcount_name);
32435 /* We don't have exact information about the insn sizes, but we may assume
32436 quite safely that we are informed about all 1 byte insns and memory
32437 address sizes. This is enough to eliminate unnecessary padding in
32441 min_insn_size (rtx insn)
32445 if (!INSN_P (insn) || !active_insn_p (insn))
32448 /* Discard alignments we've emit and jump instructions. */
32449 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
32450 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
32452 if (JUMP_TABLE_DATA_P (insn))
32455 /* Important case - calls are always 5 bytes.
32456 It is common to have many calls in the row. */
32458 && symbolic_reference_mentioned_p (PATTERN (insn))
32459 && !SIBLING_CALL_P (insn))
32461 len = get_attr_length (insn);
32465 /* For normal instructions we rely on get_attr_length being exact,
32466 with a few exceptions. */
32467 if (!JUMP_P (insn))
32469 enum attr_type type = get_attr_type (insn);
32474 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
32475 || asm_noperands (PATTERN (insn)) >= 0)
32482 /* Otherwise trust get_attr_length. */
32486 l = get_attr_length_address (insn);
32487 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
32496 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32498 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32502 ix86_avoid_jump_mispredicts (void)
32504 rtx insn, start = get_insns ();
32505 int nbytes = 0, njumps = 0;
32508 /* Look for all minimal intervals of instructions containing 4 jumps.
32509 The intervals are bounded by START and INSN. NBYTES is the total
32510 size of instructions in the interval including INSN and not including
32511 START. When the NBYTES is smaller than 16 bytes, it is possible
32512 that the end of START and INSN ends up in the same 16byte page.
32514 The smallest offset in the page INSN can start is the case where START
32515 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32516 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32518 for (insn = start; insn; insn = NEXT_INSN (insn))
32522 if (LABEL_P (insn))
32524 int align = label_to_alignment (insn);
32525 int max_skip = label_to_max_skip (insn);
32529 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32530 already in the current 16 byte page, because otherwise
32531 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32532 bytes to reach 16 byte boundary. */
32534 || (align <= 3 && max_skip != (1 << align) - 1))
32537 fprintf (dump_file, "Label %i with max_skip %i\n",
32538 INSN_UID (insn), max_skip);
32541 while (nbytes + max_skip >= 16)
32543 start = NEXT_INSN (start);
32544 if ((JUMP_P (start)
32545 && GET_CODE (PATTERN (start)) != ADDR_VEC
32546 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32548 njumps--, isjump = 1;
32551 nbytes -= min_insn_size (start);
32557 min_size = min_insn_size (insn);
32558 nbytes += min_size;
32560 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
32561 INSN_UID (insn), min_size);
32563 && GET_CODE (PATTERN (insn)) != ADDR_VEC
32564 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
32572 start = NEXT_INSN (start);
32573 if ((JUMP_P (start)
32574 && GET_CODE (PATTERN (start)) != ADDR_VEC
32575 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32577 njumps--, isjump = 1;
32580 nbytes -= min_insn_size (start);
32582 gcc_assert (njumps >= 0);
32584 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
32585 INSN_UID (start), INSN_UID (insn), nbytes);
32587 if (njumps == 3 && isjump && nbytes < 16)
32589 int padsize = 15 - nbytes + min_insn_size (insn);
32592 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
32593 INSN_UID (insn), padsize);
32594 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
32600 /* AMD Athlon works faster
32601 when RET is not destination of conditional jump or directly preceded
32602 by other jump instruction. We avoid the penalty by inserting NOP just
32603 before the RET instructions in such cases. */
32605 ix86_pad_returns (void)
32610 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32612 basic_block bb = e->src;
32613 rtx ret = BB_END (bb);
32615 bool replace = false;
32617 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
32618 || optimize_bb_for_size_p (bb))
32620 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
32621 if (active_insn_p (prev) || LABEL_P (prev))
32623 if (prev && LABEL_P (prev))
32628 FOR_EACH_EDGE (e, ei, bb->preds)
32629 if (EDGE_FREQUENCY (e) && e->src->index >= 0
32630 && !(e->flags & EDGE_FALLTHRU))
32635 prev = prev_active_insn (ret);
32637 && ((JUMP_P (prev) && any_condjump_p (prev))
32640 /* Empty functions get branch mispredict even when
32641 the jump destination is not visible to us. */
32642 if (!prev && !optimize_function_for_size_p (cfun))
32647 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
32653 /* Count the minimum number of instructions in BB. Return 4 if the
32654 number of instructions >= 4. */
32657 ix86_count_insn_bb (basic_block bb)
32660 int insn_count = 0;
32662 /* Count number of instructions in this block. Return 4 if the number
32663 of instructions >= 4. */
32664 FOR_BB_INSNS (bb, insn)
32666 /* Only happen in exit blocks. */
32668 && ANY_RETURN_P (PATTERN (insn)))
32671 if (NONDEBUG_INSN_P (insn)
32672 && GET_CODE (PATTERN (insn)) != USE
32673 && GET_CODE (PATTERN (insn)) != CLOBBER)
32676 if (insn_count >= 4)
32685 /* Count the minimum number of instructions in code path in BB.
32686 Return 4 if the number of instructions >= 4. */
32689 ix86_count_insn (basic_block bb)
32693 int min_prev_count;
32695 /* Only bother counting instructions along paths with no
32696 more than 2 basic blocks between entry and exit. Given
32697 that BB has an edge to exit, determine if a predecessor
32698 of BB has an edge from entry. If so, compute the number
32699 of instructions in the predecessor block. If there
32700 happen to be multiple such blocks, compute the minimum. */
32701 min_prev_count = 4;
32702 FOR_EACH_EDGE (e, ei, bb->preds)
32705 edge_iterator prev_ei;
32707 if (e->src == ENTRY_BLOCK_PTR)
32709 min_prev_count = 0;
32712 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
32714 if (prev_e->src == ENTRY_BLOCK_PTR)
32716 int count = ix86_count_insn_bb (e->src);
32717 if (count < min_prev_count)
32718 min_prev_count = count;
32724 if (min_prev_count < 4)
32725 min_prev_count += ix86_count_insn_bb (bb);
32727 return min_prev_count;
32730 /* Pad short funtion to 4 instructions. */
32733 ix86_pad_short_function (void)
32738 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32740 rtx ret = BB_END (e->src);
32741 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
32743 int insn_count = ix86_count_insn (e->src);
32745 /* Pad short function. */
32746 if (insn_count < 4)
32750 /* Find epilogue. */
32753 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
32754 insn = PREV_INSN (insn);
32759 /* Two NOPs count as one instruction. */
32760 insn_count = 2 * (4 - insn_count);
32761 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
32767 /* Implement machine specific optimizations. We implement padding of returns
32768 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32772 /* We are freeing block_for_insn in the toplev to keep compatibility
32773 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32774 compute_bb_for_insn ();
32776 /* Run the vzeroupper optimization if needed. */
32777 if (TARGET_VZEROUPPER)
32778 move_or_delete_vzeroupper ();
32780 if (optimize && optimize_function_for_speed_p (cfun))
32782 if (TARGET_PAD_SHORT_FUNCTION)
32783 ix86_pad_short_function ();
32784 else if (TARGET_PAD_RETURNS)
32785 ix86_pad_returns ();
32786 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32787 if (TARGET_FOUR_JUMP_LIMIT)
32788 ix86_avoid_jump_mispredicts ();
32793 /* Return nonzero when QImode register that must be represented via REX prefix
32796 x86_extended_QIreg_mentioned_p (rtx insn)
32799 extract_insn_cached (insn);
32800 for (i = 0; i < recog_data.n_operands; i++)
32801 if (REG_P (recog_data.operand[i])
32802 && REGNO (recog_data.operand[i]) > BX_REG)
32807 /* Return nonzero when P points to register encoded via REX prefix.
32808 Called via for_each_rtx. */
32810 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
32812 unsigned int regno;
32815 regno = REGNO (*p);
32816 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
32819 /* Return true when INSN mentions register that must be encoded using REX
32822 x86_extended_reg_mentioned_p (rtx insn)
32824 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
32825 extended_reg_mentioned_1, NULL);
32828 /* If profitable, negate (without causing overflow) integer constant
32829 of mode MODE at location LOC. Return true in this case. */
32831 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
32835 if (!CONST_INT_P (*loc))
32841 /* DImode x86_64 constants must fit in 32 bits. */
32842 gcc_assert (x86_64_immediate_operand (*loc, mode));
32853 gcc_unreachable ();
32856 /* Avoid overflows. */
32857 if (mode_signbit_p (mode, *loc))
32860 val = INTVAL (*loc);
32862 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32863 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32864 if ((val < 0 && val != -128)
32867 *loc = GEN_INT (-val);
32874 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32875 optabs would emit if we didn't have TFmode patterns. */
32878 x86_emit_floatuns (rtx operands[2])
32880 rtx neglab, donelab, i0, i1, f0, in, out;
32881 enum machine_mode mode, inmode;
32883 inmode = GET_MODE (operands[1]);
32884 gcc_assert (inmode == SImode || inmode == DImode);
32887 in = force_reg (inmode, operands[1]);
32888 mode = GET_MODE (out);
32889 neglab = gen_label_rtx ();
32890 donelab = gen_label_rtx ();
32891 f0 = gen_reg_rtx (mode);
32893 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
32895 expand_float (out, in, 0);
32897 emit_jump_insn (gen_jump (donelab));
32900 emit_label (neglab);
32902 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
32904 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
32906 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
32908 expand_float (f0, i0, 0);
32910 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
32912 emit_label (donelab);
32915 /* AVX2 does support 32-byte integer vector operations,
32916 thus the longest vector we are faced with is V32QImode. */
32917 #define MAX_VECT_LEN 32
32919 struct expand_vec_perm_d
32921 rtx target, op0, op1;
32922 unsigned char perm[MAX_VECT_LEN];
32923 enum machine_mode vmode;
32924 unsigned char nelt;
32928 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
32929 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
32931 /* Get a vector mode of the same size as the original but with elements
32932 twice as wide. This is only guaranteed to apply to integral vectors. */
32934 static inline enum machine_mode
32935 get_mode_wider_vector (enum machine_mode o)
32937 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32938 enum machine_mode n = GET_MODE_WIDER_MODE (o);
32939 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
32940 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
32944 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32945 with all elements equal to VAR. Return true if successful. */
32948 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
32949 rtx target, rtx val)
32972 /* First attempt to recognize VAL as-is. */
32973 dup = gen_rtx_VEC_DUPLICATE (mode, val);
32974 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
32975 if (recog_memoized (insn) < 0)
32978 /* If that fails, force VAL into a register. */
32981 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
32982 seq = get_insns ();
32985 emit_insn_before (seq, insn);
32987 ok = recog_memoized (insn) >= 0;
32996 if (TARGET_SSE || TARGET_3DNOW_A)
33000 val = gen_lowpart (SImode, val);
33001 x = gen_rtx_TRUNCATE (HImode, val);
33002 x = gen_rtx_VEC_DUPLICATE (mode, x);
33003 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33016 struct expand_vec_perm_d dperm;
33020 memset (&dperm, 0, sizeof (dperm));
33021 dperm.target = target;
33022 dperm.vmode = mode;
33023 dperm.nelt = GET_MODE_NUNITS (mode);
33024 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
33026 /* Extend to SImode using a paradoxical SUBREG. */
33027 tmp1 = gen_reg_rtx (SImode);
33028 emit_move_insn (tmp1, gen_lowpart (SImode, val));
33030 /* Insert the SImode value as low element of a V4SImode vector. */
33031 tmp2 = gen_lowpart (V4SImode, dperm.op0);
33032 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
33034 ok = (expand_vec_perm_1 (&dperm)
33035 || expand_vec_perm_broadcast_1 (&dperm));
33047 /* Replicate the value once into the next wider mode and recurse. */
33049 enum machine_mode smode, wsmode, wvmode;
33052 smode = GET_MODE_INNER (mode);
33053 wvmode = get_mode_wider_vector (mode);
33054 wsmode = GET_MODE_INNER (wvmode);
33056 val = convert_modes (wsmode, smode, val, true);
33057 x = expand_simple_binop (wsmode, ASHIFT, val,
33058 GEN_INT (GET_MODE_BITSIZE (smode)),
33059 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33060 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
33062 x = gen_lowpart (wvmode, target);
33063 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
33071 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
33072 rtx x = gen_reg_rtx (hvmode);
33074 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
33077 x = gen_rtx_VEC_CONCAT (mode, x, x);
33078 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33087 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33088 whose ONE_VAR element is VAR, and other elements are zero. Return true
33092 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
33093 rtx target, rtx var, int one_var)
33095 enum machine_mode vsimode;
33098 bool use_vector_set = false;
33103 /* For SSE4.1, we normally use vector set. But if the second
33104 element is zero and inter-unit moves are OK, we use movq
33106 use_vector_set = (TARGET_64BIT
33108 && !(TARGET_INTER_UNIT_MOVES
33114 use_vector_set = TARGET_SSE4_1;
33117 use_vector_set = TARGET_SSE2;
33120 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
33127 use_vector_set = TARGET_AVX;
33130 /* Use ix86_expand_vector_set in 64bit mode only. */
33131 use_vector_set = TARGET_AVX && TARGET_64BIT;
33137 if (use_vector_set)
33139 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
33140 var = force_reg (GET_MODE_INNER (mode), var);
33141 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33157 var = force_reg (GET_MODE_INNER (mode), var);
33158 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
33159 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33164 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
33165 new_target = gen_reg_rtx (mode);
33167 new_target = target;
33168 var = force_reg (GET_MODE_INNER (mode), var);
33169 x = gen_rtx_VEC_DUPLICATE (mode, var);
33170 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
33171 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
33174 /* We need to shuffle the value to the correct position, so
33175 create a new pseudo to store the intermediate result. */
33177 /* With SSE2, we can use the integer shuffle insns. */
33178 if (mode != V4SFmode && TARGET_SSE2)
33180 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
33182 GEN_INT (one_var == 1 ? 0 : 1),
33183 GEN_INT (one_var == 2 ? 0 : 1),
33184 GEN_INT (one_var == 3 ? 0 : 1)));
33185 if (target != new_target)
33186 emit_move_insn (target, new_target);
33190 /* Otherwise convert the intermediate result to V4SFmode and
33191 use the SSE1 shuffle instructions. */
33192 if (mode != V4SFmode)
33194 tmp = gen_reg_rtx (V4SFmode);
33195 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
33200 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
33202 GEN_INT (one_var == 1 ? 0 : 1),
33203 GEN_INT (one_var == 2 ? 0+4 : 1+4),
33204 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
33206 if (mode != V4SFmode)
33207 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
33208 else if (tmp != target)
33209 emit_move_insn (target, tmp);
33211 else if (target != new_target)
33212 emit_move_insn (target, new_target);
33217 vsimode = V4SImode;
33223 vsimode = V2SImode;
33229 /* Zero extend the variable element to SImode and recurse. */
33230 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
33232 x = gen_reg_rtx (vsimode);
33233 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
33235 gcc_unreachable ();
33237 emit_move_insn (target, gen_lowpart (mode, x));
33245 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33246 consisting of the values in VALS. It is known that all elements
33247 except ONE_VAR are constants. Return true if successful. */
33250 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
33251 rtx target, rtx vals, int one_var)
33253 rtx var = XVECEXP (vals, 0, one_var);
33254 enum machine_mode wmode;
33257 const_vec = copy_rtx (vals);
33258 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
33259 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
33267 /* For the two element vectors, it's just as easy to use
33268 the general case. */
33272 /* Use ix86_expand_vector_set in 64bit mode only. */
33295 /* There's no way to set one QImode entry easily. Combine
33296 the variable value with its adjacent constant value, and
33297 promote to an HImode set. */
33298 x = XVECEXP (vals, 0, one_var ^ 1);
33301 var = convert_modes (HImode, QImode, var, true);
33302 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
33303 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33304 x = GEN_INT (INTVAL (x) & 0xff);
33308 var = convert_modes (HImode, QImode, var, true);
33309 x = gen_int_mode (INTVAL (x) << 8, HImode);
33311 if (x != const0_rtx)
33312 var = expand_simple_binop (HImode, IOR, var, x, var,
33313 1, OPTAB_LIB_WIDEN);
33315 x = gen_reg_rtx (wmode);
33316 emit_move_insn (x, gen_lowpart (wmode, const_vec));
33317 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
33319 emit_move_insn (target, gen_lowpart (mode, x));
33326 emit_move_insn (target, const_vec);
33327 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33331 /* A subroutine of ix86_expand_vector_init_general. Use vector
33332 concatenate to handle the most general case: all values variable,
33333 and none identical. */
33336 ix86_expand_vector_init_concat (enum machine_mode mode,
33337 rtx target, rtx *ops, int n)
33339 enum machine_mode cmode, hmode = VOIDmode;
33340 rtx first[8], second[4];
33380 gcc_unreachable ();
33383 if (!register_operand (ops[1], cmode))
33384 ops[1] = force_reg (cmode, ops[1]);
33385 if (!register_operand (ops[0], cmode))
33386 ops[0] = force_reg (cmode, ops[0]);
33387 emit_insn (gen_rtx_SET (VOIDmode, target,
33388 gen_rtx_VEC_CONCAT (mode, ops[0],
33408 gcc_unreachable ();
33424 gcc_unreachable ();
33429 /* FIXME: We process inputs backward to help RA. PR 36222. */
33432 for (; i > 0; i -= 2, j--)
33434 first[j] = gen_reg_rtx (cmode);
33435 v = gen_rtvec (2, ops[i - 1], ops[i]);
33436 ix86_expand_vector_init (false, first[j],
33437 gen_rtx_PARALLEL (cmode, v));
33443 gcc_assert (hmode != VOIDmode);
33444 for (i = j = 0; i < n; i += 2, j++)
33446 second[j] = gen_reg_rtx (hmode);
33447 ix86_expand_vector_init_concat (hmode, second [j],
33451 ix86_expand_vector_init_concat (mode, target, second, n);
33454 ix86_expand_vector_init_concat (mode, target, first, n);
33458 gcc_unreachable ();
33462 /* A subroutine of ix86_expand_vector_init_general. Use vector
33463 interleave to handle the most general case: all values variable,
33464 and none identical. */
33467 ix86_expand_vector_init_interleave (enum machine_mode mode,
33468 rtx target, rtx *ops, int n)
33470 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
33473 rtx (*gen_load_even) (rtx, rtx, rtx);
33474 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
33475 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
33480 gen_load_even = gen_vec_setv8hi;
33481 gen_interleave_first_low = gen_vec_interleave_lowv4si;
33482 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33483 inner_mode = HImode;
33484 first_imode = V4SImode;
33485 second_imode = V2DImode;
33486 third_imode = VOIDmode;
33489 gen_load_even = gen_vec_setv16qi;
33490 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
33491 gen_interleave_second_low = gen_vec_interleave_lowv4si;
33492 inner_mode = QImode;
33493 first_imode = V8HImode;
33494 second_imode = V4SImode;
33495 third_imode = V2DImode;
33498 gcc_unreachable ();
33501 for (i = 0; i < n; i++)
33503 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33504 op0 = gen_reg_rtx (SImode);
33505 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
33507 /* Insert the SImode value as low element of V4SImode vector. */
33508 op1 = gen_reg_rtx (V4SImode);
33509 op0 = gen_rtx_VEC_MERGE (V4SImode,
33510 gen_rtx_VEC_DUPLICATE (V4SImode,
33512 CONST0_RTX (V4SImode),
33514 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
33516 /* Cast the V4SImode vector back to a vector in orignal mode. */
33517 op0 = gen_reg_rtx (mode);
33518 emit_move_insn (op0, gen_lowpart (mode, op1));
33520 /* Load even elements into the second positon. */
33521 emit_insn (gen_load_even (op0,
33522 force_reg (inner_mode,
33526 /* Cast vector to FIRST_IMODE vector. */
33527 ops[i] = gen_reg_rtx (first_imode);
33528 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
33531 /* Interleave low FIRST_IMODE vectors. */
33532 for (i = j = 0; i < n; i += 2, j++)
33534 op0 = gen_reg_rtx (first_imode);
33535 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
33537 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33538 ops[j] = gen_reg_rtx (second_imode);
33539 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
33542 /* Interleave low SECOND_IMODE vectors. */
33543 switch (second_imode)
33546 for (i = j = 0; i < n / 2; i += 2, j++)
33548 op0 = gen_reg_rtx (second_imode);
33549 emit_insn (gen_interleave_second_low (op0, ops[i],
33552 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33554 ops[j] = gen_reg_rtx (third_imode);
33555 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
33557 second_imode = V2DImode;
33558 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33562 op0 = gen_reg_rtx (second_imode);
33563 emit_insn (gen_interleave_second_low (op0, ops[0],
33566 /* Cast the SECOND_IMODE vector back to a vector on original
33568 emit_insn (gen_rtx_SET (VOIDmode, target,
33569 gen_lowpart (mode, op0)));
33573 gcc_unreachable ();
33577 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33578 all values variable, and none identical. */
33581 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
33582 rtx target, rtx vals)
33584 rtx ops[32], op0, op1;
33585 enum machine_mode half_mode = VOIDmode;
33592 if (!mmx_ok && !TARGET_SSE)
33604 n = GET_MODE_NUNITS (mode);
33605 for (i = 0; i < n; i++)
33606 ops[i] = XVECEXP (vals, 0, i);
33607 ix86_expand_vector_init_concat (mode, target, ops, n);
33611 half_mode = V16QImode;
33615 half_mode = V8HImode;
33619 n = GET_MODE_NUNITS (mode);
33620 for (i = 0; i < n; i++)
33621 ops[i] = XVECEXP (vals, 0, i);
33622 op0 = gen_reg_rtx (half_mode);
33623 op1 = gen_reg_rtx (half_mode);
33624 ix86_expand_vector_init_interleave (half_mode, op0, ops,
33626 ix86_expand_vector_init_interleave (half_mode, op1,
33627 &ops [n >> 1], n >> 2);
33628 emit_insn (gen_rtx_SET (VOIDmode, target,
33629 gen_rtx_VEC_CONCAT (mode, op0, op1)));
33633 if (!TARGET_SSE4_1)
33641 /* Don't use ix86_expand_vector_init_interleave if we can't
33642 move from GPR to SSE register directly. */
33643 if (!TARGET_INTER_UNIT_MOVES)
33646 n = GET_MODE_NUNITS (mode);
33647 for (i = 0; i < n; i++)
33648 ops[i] = XVECEXP (vals, 0, i);
33649 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
33657 gcc_unreachable ();
33661 int i, j, n_elts, n_words, n_elt_per_word;
33662 enum machine_mode inner_mode;
33663 rtx words[4], shift;
33665 inner_mode = GET_MODE_INNER (mode);
33666 n_elts = GET_MODE_NUNITS (mode);
33667 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
33668 n_elt_per_word = n_elts / n_words;
33669 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
33671 for (i = 0; i < n_words; ++i)
33673 rtx word = NULL_RTX;
33675 for (j = 0; j < n_elt_per_word; ++j)
33677 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
33678 elt = convert_modes (word_mode, inner_mode, elt, true);
33684 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
33685 word, 1, OPTAB_LIB_WIDEN);
33686 word = expand_simple_binop (word_mode, IOR, word, elt,
33687 word, 1, OPTAB_LIB_WIDEN);
33695 emit_move_insn (target, gen_lowpart (mode, words[0]));
33696 else if (n_words == 2)
33698 rtx tmp = gen_reg_rtx (mode);
33699 emit_clobber (tmp);
33700 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
33701 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
33702 emit_move_insn (target, tmp);
33704 else if (n_words == 4)
33706 rtx tmp = gen_reg_rtx (V4SImode);
33707 gcc_assert (word_mode == SImode);
33708 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
33709 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
33710 emit_move_insn (target, gen_lowpart (mode, tmp));
33713 gcc_unreachable ();
33717 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33718 instructions unless MMX_OK is true. */
33721 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
33723 enum machine_mode mode = GET_MODE (target);
33724 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33725 int n_elts = GET_MODE_NUNITS (mode);
33726 int n_var = 0, one_var = -1;
33727 bool all_same = true, all_const_zero = true;
33731 for (i = 0; i < n_elts; ++i)
33733 x = XVECEXP (vals, 0, i);
33734 if (!(CONST_INT_P (x)
33735 || GET_CODE (x) == CONST_DOUBLE
33736 || GET_CODE (x) == CONST_FIXED))
33737 n_var++, one_var = i;
33738 else if (x != CONST0_RTX (inner_mode))
33739 all_const_zero = false;
33740 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
33744 /* Constants are best loaded from the constant pool. */
33747 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
33751 /* If all values are identical, broadcast the value. */
33753 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
33754 XVECEXP (vals, 0, 0)))
33757 /* Values where only one field is non-constant are best loaded from
33758 the pool and overwritten via move later. */
33762 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
33763 XVECEXP (vals, 0, one_var),
33767 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
33771 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
33775 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
33777 enum machine_mode mode = GET_MODE (target);
33778 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33779 enum machine_mode half_mode;
33780 bool use_vec_merge = false;
33782 static rtx (*gen_extract[6][2]) (rtx, rtx)
33784 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
33785 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
33786 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
33787 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
33788 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
33789 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
33791 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
33793 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
33794 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
33795 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
33796 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
33797 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
33798 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
33808 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33809 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
33811 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33813 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33814 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33820 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
33824 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33825 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
33827 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33829 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33830 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33837 /* For the two element vectors, we implement a VEC_CONCAT with
33838 the extraction of the other element. */
33840 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
33841 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
33844 op0 = val, op1 = tmp;
33846 op0 = tmp, op1 = val;
33848 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
33849 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33854 use_vec_merge = TARGET_SSE4_1;
33861 use_vec_merge = true;
33865 /* tmp = target = A B C D */
33866 tmp = copy_to_reg (target);
33867 /* target = A A B B */
33868 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
33869 /* target = X A B B */
33870 ix86_expand_vector_set (false, target, val, 0);
33871 /* target = A X C D */
33872 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33873 const1_rtx, const0_rtx,
33874 GEN_INT (2+4), GEN_INT (3+4)));
33878 /* tmp = target = A B C D */
33879 tmp = copy_to_reg (target);
33880 /* tmp = X B C D */
33881 ix86_expand_vector_set (false, tmp, val, 0);
33882 /* target = A B X D */
33883 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33884 const0_rtx, const1_rtx,
33885 GEN_INT (0+4), GEN_INT (3+4)));
33889 /* tmp = target = A B C D */
33890 tmp = copy_to_reg (target);
33891 /* tmp = X B C D */
33892 ix86_expand_vector_set (false, tmp, val, 0);
33893 /* target = A B X D */
33894 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33895 const0_rtx, const1_rtx,
33896 GEN_INT (2+4), GEN_INT (0+4)));
33900 gcc_unreachable ();
33905 use_vec_merge = TARGET_SSE4_1;
33909 /* Element 0 handled by vec_merge below. */
33912 use_vec_merge = true;
33918 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33919 store into element 0, then shuffle them back. */
33923 order[0] = GEN_INT (elt);
33924 order[1] = const1_rtx;
33925 order[2] = const2_rtx;
33926 order[3] = GEN_INT (3);
33927 order[elt] = const0_rtx;
33929 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33930 order[1], order[2], order[3]));
33932 ix86_expand_vector_set (false, target, val, 0);
33934 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33935 order[1], order[2], order[3]));
33939 /* For SSE1, we have to reuse the V4SF code. */
33940 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
33941 gen_lowpart (SFmode, val), elt);
33946 use_vec_merge = TARGET_SSE2;
33949 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
33953 use_vec_merge = TARGET_SSE4_1;
33960 half_mode = V16QImode;
33966 half_mode = V8HImode;
33972 half_mode = V4SImode;
33978 half_mode = V2DImode;
33984 half_mode = V4SFmode;
33990 half_mode = V2DFmode;
33996 /* Compute offset. */
34000 gcc_assert (i <= 1);
34002 /* Extract the half. */
34003 tmp = gen_reg_rtx (half_mode);
34004 emit_insn (gen_extract[j][i] (tmp, target));
34006 /* Put val in tmp at elt. */
34007 ix86_expand_vector_set (false, tmp, val, elt);
34010 emit_insn (gen_insert[j][i] (target, target, tmp));
34019 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
34020 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
34021 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34025 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34027 emit_move_insn (mem, target);
34029 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34030 emit_move_insn (tmp, val);
34032 emit_move_insn (target, mem);
34037 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
34039 enum machine_mode mode = GET_MODE (vec);
34040 enum machine_mode inner_mode = GET_MODE_INNER (mode);
34041 bool use_vec_extr = false;
34054 use_vec_extr = true;
34058 use_vec_extr = TARGET_SSE4_1;
34070 tmp = gen_reg_rtx (mode);
34071 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
34072 GEN_INT (elt), GEN_INT (elt),
34073 GEN_INT (elt+4), GEN_INT (elt+4)));
34077 tmp = gen_reg_rtx (mode);
34078 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
34082 gcc_unreachable ();
34085 use_vec_extr = true;
34090 use_vec_extr = TARGET_SSE4_1;
34104 tmp = gen_reg_rtx (mode);
34105 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
34106 GEN_INT (elt), GEN_INT (elt),
34107 GEN_INT (elt), GEN_INT (elt)));
34111 tmp = gen_reg_rtx (mode);
34112 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
34116 gcc_unreachable ();
34119 use_vec_extr = true;
34124 /* For SSE1, we have to reuse the V4SF code. */
34125 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
34126 gen_lowpart (V4SFmode, vec), elt);
34132 use_vec_extr = TARGET_SSE2;
34135 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
34139 use_vec_extr = TARGET_SSE4_1;
34145 tmp = gen_reg_rtx (V4SFmode);
34147 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
34149 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
34150 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34158 tmp = gen_reg_rtx (V2DFmode);
34160 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
34162 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
34163 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34171 tmp = gen_reg_rtx (V16QImode);
34173 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
34175 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
34176 ix86_expand_vector_extract (false, target, tmp, elt & 15);
34184 tmp = gen_reg_rtx (V8HImode);
34186 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
34188 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
34189 ix86_expand_vector_extract (false, target, tmp, elt & 7);
34197 tmp = gen_reg_rtx (V4SImode);
34199 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
34201 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
34202 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34210 tmp = gen_reg_rtx (V2DImode);
34212 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
34214 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
34215 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34221 /* ??? Could extract the appropriate HImode element and shift. */
34228 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
34229 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
34231 /* Let the rtl optimizers know about the zero extension performed. */
34232 if (inner_mode == QImode || inner_mode == HImode)
34234 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
34235 target = gen_lowpart (SImode, target);
34238 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34242 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34244 emit_move_insn (mem, vec);
34246 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34247 emit_move_insn (target, tmp);
34251 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34252 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34253 The upper bits of DEST are undefined, though they shouldn't cause
34254 exceptions (some bits from src or all zeros are ok). */
34257 emit_reduc_half (rtx dest, rtx src, int i)
34260 switch (GET_MODE (src))
34264 tem = gen_sse_movhlps (dest, src, src);
34266 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
34267 GEN_INT (1 + 4), GEN_INT (1 + 4));
34270 tem = gen_vec_interleave_highv2df (dest, src, src);
34276 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
34277 gen_lowpart (V1TImode, src),
34282 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
34284 tem = gen_avx_shufps256 (dest, src, src,
34285 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
34289 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
34291 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
34298 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
34299 gen_lowpart (V4DImode, src),
34300 gen_lowpart (V4DImode, src),
34303 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
34304 gen_lowpart (V2TImode, src),
34308 gcc_unreachable ();
34313 /* Expand a vector reduction. FN is the binary pattern to reduce;
34314 DEST is the destination; IN is the input vector. */
34317 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
34319 rtx half, dst, vec = in;
34320 enum machine_mode mode = GET_MODE (in);
34323 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34325 && mode == V8HImode
34326 && fn == gen_uminv8hi3)
34328 emit_insn (gen_sse4_1_phminposuw (dest, in));
34332 for (i = GET_MODE_BITSIZE (mode);
34333 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
34336 half = gen_reg_rtx (mode);
34337 emit_reduc_half (half, vec, i);
34338 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
34341 dst = gen_reg_rtx (mode);
34342 emit_insn (fn (dst, half, vec));
34347 /* Target hook for scalar_mode_supported_p. */
34349 ix86_scalar_mode_supported_p (enum machine_mode mode)
34351 if (DECIMAL_FLOAT_MODE_P (mode))
34352 return default_decimal_float_supported_p ();
34353 else if (mode == TFmode)
34356 return default_scalar_mode_supported_p (mode);
34359 /* Implements target hook vector_mode_supported_p. */
34361 ix86_vector_mode_supported_p (enum machine_mode mode)
34363 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
34365 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
34367 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
34369 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
34371 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
34376 /* Target hook for c_mode_for_suffix. */
34377 static enum machine_mode
34378 ix86_c_mode_for_suffix (char suffix)
34388 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34390 We do this in the new i386 backend to maintain source compatibility
34391 with the old cc0-based compiler. */
34394 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
34395 tree inputs ATTRIBUTE_UNUSED,
34398 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
34400 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
34405 /* Implements target vector targetm.asm.encode_section_info. */
34407 static void ATTRIBUTE_UNUSED
34408 ix86_encode_section_info (tree decl, rtx rtl, int first)
34410 default_encode_section_info (decl, rtl, first);
34412 if (TREE_CODE (decl) == VAR_DECL
34413 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
34414 && ix86_in_large_data_p (decl))
34415 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
34418 /* Worker function for REVERSE_CONDITION. */
34421 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
34423 return (mode != CCFPmode && mode != CCFPUmode
34424 ? reverse_condition (code)
34425 : reverse_condition_maybe_unordered (code));
34428 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34432 output_387_reg_move (rtx insn, rtx *operands)
34434 if (REG_P (operands[0]))
34436 if (REG_P (operands[1])
34437 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34439 if (REGNO (operands[0]) == FIRST_STACK_REG)
34440 return output_387_ffreep (operands, 0);
34441 return "fstp\t%y0";
34443 if (STACK_TOP_P (operands[0]))
34444 return "fld%Z1\t%y1";
34447 else if (MEM_P (operands[0]))
34449 gcc_assert (REG_P (operands[1]));
34450 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34451 return "fstp%Z0\t%y0";
34454 /* There is no non-popping store to memory for XFmode.
34455 So if we need one, follow the store with a load. */
34456 if (GET_MODE (operands[0]) == XFmode)
34457 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34459 return "fst%Z0\t%y0";
34466 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34467 FP status register is set. */
34470 ix86_emit_fp_unordered_jump (rtx label)
34472 rtx reg = gen_reg_rtx (HImode);
34475 emit_insn (gen_x86_fnstsw_1 (reg));
34477 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
34479 emit_insn (gen_x86_sahf_1 (reg));
34481 temp = gen_rtx_REG (CCmode, FLAGS_REG);
34482 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
34486 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
34488 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
34489 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
34492 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
34493 gen_rtx_LABEL_REF (VOIDmode, label),
34495 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
34497 emit_jump_insn (temp);
34498 predict_jump (REG_BR_PROB_BASE * 10 / 100);
34501 /* Output code to perform a log1p XFmode calculation. */
34503 void ix86_emit_i387_log1p (rtx op0, rtx op1)
34505 rtx label1 = gen_label_rtx ();
34506 rtx label2 = gen_label_rtx ();
34508 rtx tmp = gen_reg_rtx (XFmode);
34509 rtx tmp2 = gen_reg_rtx (XFmode);
34512 emit_insn (gen_absxf2 (tmp, op1));
34513 test = gen_rtx_GE (VOIDmode, tmp,
34514 CONST_DOUBLE_FROM_REAL_VALUE (
34515 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
34517 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
34519 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34520 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
34521 emit_jump (label2);
34523 emit_label (label1);
34524 emit_move_insn (tmp, CONST1_RTX (XFmode));
34525 emit_insn (gen_addxf3 (tmp, op1, tmp));
34526 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34527 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
34529 emit_label (label2);
34532 /* Emit code for round calculation. */
34533 void ix86_emit_i387_round (rtx op0, rtx op1)
34535 enum machine_mode inmode = GET_MODE (op1);
34536 enum machine_mode outmode = GET_MODE (op0);
34537 rtx e1, e2, res, tmp, tmp1, half;
34538 rtx scratch = gen_reg_rtx (HImode);
34539 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
34540 rtx jump_label = gen_label_rtx ();
34542 rtx (*gen_abs) (rtx, rtx);
34543 rtx (*gen_neg) (rtx, rtx);
34548 gen_abs = gen_abssf2;
34551 gen_abs = gen_absdf2;
34554 gen_abs = gen_absxf2;
34557 gcc_unreachable ();
34563 gen_neg = gen_negsf2;
34566 gen_neg = gen_negdf2;
34569 gen_neg = gen_negxf2;
34572 gen_neg = gen_neghi2;
34575 gen_neg = gen_negsi2;
34578 gen_neg = gen_negdi2;
34581 gcc_unreachable ();
34584 e1 = gen_reg_rtx (inmode);
34585 e2 = gen_reg_rtx (inmode);
34586 res = gen_reg_rtx (outmode);
34588 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
34590 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34592 /* scratch = fxam(op1) */
34593 emit_insn (gen_rtx_SET (VOIDmode, scratch,
34594 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
34596 /* e1 = fabs(op1) */
34597 emit_insn (gen_abs (e1, op1));
34599 /* e2 = e1 + 0.5 */
34600 half = force_reg (inmode, half);
34601 emit_insn (gen_rtx_SET (VOIDmode, e2,
34602 gen_rtx_PLUS (inmode, e1, half)));
34604 /* res = floor(e2) */
34605 if (inmode != XFmode)
34607 tmp1 = gen_reg_rtx (XFmode);
34609 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
34610 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
34620 rtx tmp0 = gen_reg_rtx (XFmode);
34622 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
34624 emit_insn (gen_rtx_SET (VOIDmode, res,
34625 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
34626 UNSPEC_TRUNC_NOOP)));
34630 emit_insn (gen_frndintxf2_floor (res, tmp1));
34633 emit_insn (gen_lfloorxfhi2 (res, tmp1));
34636 emit_insn (gen_lfloorxfsi2 (res, tmp1));
34639 emit_insn (gen_lfloorxfdi2 (res, tmp1));
34642 gcc_unreachable ();
34645 /* flags = signbit(a) */
34646 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
34648 /* if (flags) then res = -res */
34649 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
34650 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
34651 gen_rtx_LABEL_REF (VOIDmode, jump_label),
34653 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34654 predict_jump (REG_BR_PROB_BASE * 50 / 100);
34655 JUMP_LABEL (insn) = jump_label;
34657 emit_insn (gen_neg (res, res));
34659 emit_label (jump_label);
34660 LABEL_NUSES (jump_label) = 1;
34662 emit_move_insn (op0, res);
34665 /* Output code to perform a Newton-Rhapson approximation of a single precision
34666 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34668 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
34670 rtx x0, x1, e0, e1;
34672 x0 = gen_reg_rtx (mode);
34673 e0 = gen_reg_rtx (mode);
34674 e1 = gen_reg_rtx (mode);
34675 x1 = gen_reg_rtx (mode);
34677 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34679 b = force_reg (mode, b);
34681 /* x0 = rcp(b) estimate */
34682 emit_insn (gen_rtx_SET (VOIDmode, x0,
34683 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
34686 emit_insn (gen_rtx_SET (VOIDmode, e0,
34687 gen_rtx_MULT (mode, x0, b)));
34690 emit_insn (gen_rtx_SET (VOIDmode, e0,
34691 gen_rtx_MULT (mode, x0, e0)));
34694 emit_insn (gen_rtx_SET (VOIDmode, e1,
34695 gen_rtx_PLUS (mode, x0, x0)));
34698 emit_insn (gen_rtx_SET (VOIDmode, x1,
34699 gen_rtx_MINUS (mode, e1, e0)));
34702 emit_insn (gen_rtx_SET (VOIDmode, res,
34703 gen_rtx_MULT (mode, a, x1)));
34706 /* Output code to perform a Newton-Rhapson approximation of a
34707 single precision floating point [reciprocal] square root. */
34709 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
34712 rtx x0, e0, e1, e2, e3, mthree, mhalf;
34715 x0 = gen_reg_rtx (mode);
34716 e0 = gen_reg_rtx (mode);
34717 e1 = gen_reg_rtx (mode);
34718 e2 = gen_reg_rtx (mode);
34719 e3 = gen_reg_rtx (mode);
34721 real_from_integer (&r, VOIDmode, -3, -1, 0);
34722 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34724 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
34725 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34727 if (VECTOR_MODE_P (mode))
34729 mthree = ix86_build_const_vector (mode, true, mthree);
34730 mhalf = ix86_build_const_vector (mode, true, mhalf);
34733 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34734 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34736 a = force_reg (mode, a);
34738 /* x0 = rsqrt(a) estimate */
34739 emit_insn (gen_rtx_SET (VOIDmode, x0,
34740 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
34743 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34748 zero = gen_reg_rtx (mode);
34749 mask = gen_reg_rtx (mode);
34751 zero = force_reg (mode, CONST0_RTX(mode));
34752 emit_insn (gen_rtx_SET (VOIDmode, mask,
34753 gen_rtx_NE (mode, zero, a)));
34755 emit_insn (gen_rtx_SET (VOIDmode, x0,
34756 gen_rtx_AND (mode, x0, mask)));
34760 emit_insn (gen_rtx_SET (VOIDmode, e0,
34761 gen_rtx_MULT (mode, x0, a)));
34763 emit_insn (gen_rtx_SET (VOIDmode, e1,
34764 gen_rtx_MULT (mode, e0, x0)));
34767 mthree = force_reg (mode, mthree);
34768 emit_insn (gen_rtx_SET (VOIDmode, e2,
34769 gen_rtx_PLUS (mode, e1, mthree)));
34771 mhalf = force_reg (mode, mhalf);
34773 /* e3 = -.5 * x0 */
34774 emit_insn (gen_rtx_SET (VOIDmode, e3,
34775 gen_rtx_MULT (mode, x0, mhalf)));
34777 /* e3 = -.5 * e0 */
34778 emit_insn (gen_rtx_SET (VOIDmode, e3,
34779 gen_rtx_MULT (mode, e0, mhalf)));
34780 /* ret = e2 * e3 */
34781 emit_insn (gen_rtx_SET (VOIDmode, res,
34782 gen_rtx_MULT (mode, e2, e3)));
34785 #ifdef TARGET_SOLARIS
34786 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34789 i386_solaris_elf_named_section (const char *name, unsigned int flags,
34792 /* With Binutils 2.15, the "@unwind" marker must be specified on
34793 every occurrence of the ".eh_frame" section, not just the first
34796 && strcmp (name, ".eh_frame") == 0)
34798 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
34799 flags & SECTION_WRITE ? "aw" : "a");
34804 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
34806 solaris_elf_asm_comdat_section (name, flags, decl);
34811 default_elf_asm_named_section (name, flags, decl);
34813 #endif /* TARGET_SOLARIS */
34815 /* Return the mangling of TYPE if it is an extended fundamental type. */
34817 static const char *
34818 ix86_mangle_type (const_tree type)
34820 type = TYPE_MAIN_VARIANT (type);
34822 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
34823 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
34826 switch (TYPE_MODE (type))
34829 /* __float128 is "g". */
34832 /* "long double" or __float80 is "e". */
34839 /* For 32-bit code we can save PIC register setup by using
34840 __stack_chk_fail_local hidden function instead of calling
34841 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34842 register, so it is better to call __stack_chk_fail directly. */
34844 static tree ATTRIBUTE_UNUSED
34845 ix86_stack_protect_fail (void)
34847 return TARGET_64BIT
34848 ? default_external_stack_protect_fail ()
34849 : default_hidden_stack_protect_fail ();
34852 /* Select a format to encode pointers in exception handling data. CODE
34853 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34854 true if the symbol may be affected by dynamic relocations.
34856 ??? All x86 object file formats are capable of representing this.
34857 After all, the relocation needed is the same as for the call insn.
34858 Whether or not a particular assembler allows us to enter such, I
34859 guess we'll have to see. */
34861 asm_preferred_eh_data_format (int code, int global)
34865 int type = DW_EH_PE_sdata8;
34867 || ix86_cmodel == CM_SMALL_PIC
34868 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
34869 type = DW_EH_PE_sdata4;
34870 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
34872 if (ix86_cmodel == CM_SMALL
34873 || (ix86_cmodel == CM_MEDIUM && code))
34874 return DW_EH_PE_udata4;
34875 return DW_EH_PE_absptr;
34878 /* Expand copysign from SIGN to the positive value ABS_VALUE
34879 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34882 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
34884 enum machine_mode mode = GET_MODE (sign);
34885 rtx sgn = gen_reg_rtx (mode);
34886 if (mask == NULL_RTX)
34888 enum machine_mode vmode;
34890 if (mode == SFmode)
34892 else if (mode == DFmode)
34897 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
34898 if (!VECTOR_MODE_P (mode))
34900 /* We need to generate a scalar mode mask in this case. */
34901 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34902 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34903 mask = gen_reg_rtx (mode);
34904 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34908 mask = gen_rtx_NOT (mode, mask);
34909 emit_insn (gen_rtx_SET (VOIDmode, sgn,
34910 gen_rtx_AND (mode, mask, sign)));
34911 emit_insn (gen_rtx_SET (VOIDmode, result,
34912 gen_rtx_IOR (mode, abs_value, sgn)));
34915 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34916 mask for masking out the sign-bit is stored in *SMASK, if that is
34919 ix86_expand_sse_fabs (rtx op0, rtx *smask)
34921 enum machine_mode vmode, mode = GET_MODE (op0);
34924 xa = gen_reg_rtx (mode);
34925 if (mode == SFmode)
34927 else if (mode == DFmode)
34931 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
34932 if (!VECTOR_MODE_P (mode))
34934 /* We need to generate a scalar mode mask in this case. */
34935 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34936 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34937 mask = gen_reg_rtx (mode);
34938 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34940 emit_insn (gen_rtx_SET (VOIDmode, xa,
34941 gen_rtx_AND (mode, op0, mask)));
34949 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34950 swapping the operands if SWAP_OPERANDS is true. The expanded
34951 code is a forward jump to a newly created label in case the
34952 comparison is true. The generated label rtx is returned. */
34954 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
34955 bool swap_operands)
34966 label = gen_label_rtx ();
34967 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
34968 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34969 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
34970 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
34971 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
34972 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
34973 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34974 JUMP_LABEL (tmp) = label;
34979 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34980 using comparison code CODE. Operands are swapped for the comparison if
34981 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34983 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
34984 bool swap_operands)
34986 rtx (*insn)(rtx, rtx, rtx, rtx);
34987 enum machine_mode mode = GET_MODE (op0);
34988 rtx mask = gen_reg_rtx (mode);
34997 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
34999 emit_insn (insn (mask, op0, op1,
35000 gen_rtx_fmt_ee (code, mode, op0, op1)));
35004 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35005 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35007 ix86_gen_TWO52 (enum machine_mode mode)
35009 REAL_VALUE_TYPE TWO52r;
35012 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
35013 TWO52 = const_double_from_real_value (TWO52r, mode);
35014 TWO52 = force_reg (mode, TWO52);
35019 /* Expand SSE sequence for computing lround from OP1 storing
35022 ix86_expand_lround (rtx op0, rtx op1)
35024 /* C code for the stuff we're doing below:
35025 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35028 enum machine_mode mode = GET_MODE (op1);
35029 const struct real_format *fmt;
35030 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35033 /* load nextafter (0.5, 0.0) */
35034 fmt = REAL_MODE_FORMAT (mode);
35035 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35036 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35038 /* adj = copysign (0.5, op1) */
35039 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
35040 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
35042 /* adj = op1 + adj */
35043 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
35045 /* op0 = (imode)adj */
35046 expand_fix (op0, adj, 0);
35049 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35052 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
35054 /* C code for the stuff we're doing below (for do_floor):
35056 xi -= (double)xi > op1 ? 1 : 0;
35059 enum machine_mode fmode = GET_MODE (op1);
35060 enum machine_mode imode = GET_MODE (op0);
35061 rtx ireg, freg, label, tmp;
35063 /* reg = (long)op1 */
35064 ireg = gen_reg_rtx (imode);
35065 expand_fix (ireg, op1, 0);
35067 /* freg = (double)reg */
35068 freg = gen_reg_rtx (fmode);
35069 expand_float (freg, ireg, 0);
35071 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35072 label = ix86_expand_sse_compare_and_jump (UNLE,
35073 freg, op1, !do_floor);
35074 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
35075 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
35076 emit_move_insn (ireg, tmp);
35078 emit_label (label);
35079 LABEL_NUSES (label) = 1;
35081 emit_move_insn (op0, ireg);
35084 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35085 result in OPERAND0. */
35087 ix86_expand_rint (rtx operand0, rtx operand1)
35089 /* C code for the stuff we're doing below:
35090 xa = fabs (operand1);
35091 if (!isless (xa, 2**52))
35093 xa = xa + 2**52 - 2**52;
35094 return copysign (xa, operand1);
35096 enum machine_mode mode = GET_MODE (operand0);
35097 rtx res, xa, label, TWO52, mask;
35099 res = gen_reg_rtx (mode);
35100 emit_move_insn (res, operand1);
35102 /* xa = abs (operand1) */
35103 xa = ix86_expand_sse_fabs (res, &mask);
35105 /* if (!isless (xa, TWO52)) goto label; */
35106 TWO52 = ix86_gen_TWO52 (mode);
35107 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35109 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35110 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35112 ix86_sse_copysign_to_positive (res, xa, res, mask);
35114 emit_label (label);
35115 LABEL_NUSES (label) = 1;
35117 emit_move_insn (operand0, res);
35120 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35123 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
35125 /* C code for the stuff we expand below.
35126 double xa = fabs (x), x2;
35127 if (!isless (xa, TWO52))
35129 xa = xa + TWO52 - TWO52;
35130 x2 = copysign (xa, x);
35139 enum machine_mode mode = GET_MODE (operand0);
35140 rtx xa, TWO52, tmp, label, one, res, mask;
35142 TWO52 = ix86_gen_TWO52 (mode);
35144 /* Temporary for holding the result, initialized to the input
35145 operand to ease control flow. */
35146 res = gen_reg_rtx (mode);
35147 emit_move_insn (res, operand1);
35149 /* xa = abs (operand1) */
35150 xa = ix86_expand_sse_fabs (res, &mask);
35152 /* if (!isless (xa, TWO52)) goto label; */
35153 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35155 /* xa = xa + TWO52 - TWO52; */
35156 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35157 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35159 /* xa = copysign (xa, operand1) */
35160 ix86_sse_copysign_to_positive (xa, xa, res, mask);
35162 /* generate 1.0 or -1.0 */
35163 one = force_reg (mode,
35164 const_double_from_real_value (do_floor
35165 ? dconst1 : dconstm1, mode));
35167 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35168 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35169 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35170 gen_rtx_AND (mode, one, tmp)));
35171 /* We always need to subtract here to preserve signed zero. */
35172 tmp = expand_simple_binop (mode, MINUS,
35173 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35174 emit_move_insn (res, tmp);
35176 emit_label (label);
35177 LABEL_NUSES (label) = 1;
35179 emit_move_insn (operand0, res);
35182 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35185 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
35187 /* C code for the stuff we expand below.
35188 double xa = fabs (x), x2;
35189 if (!isless (xa, TWO52))
35191 x2 = (double)(long)x;
35198 if (HONOR_SIGNED_ZEROS (mode))
35199 return copysign (x2, x);
35202 enum machine_mode mode = GET_MODE (operand0);
35203 rtx xa, xi, TWO52, tmp, label, one, res, mask;
35205 TWO52 = ix86_gen_TWO52 (mode);
35207 /* Temporary for holding the result, initialized to the input
35208 operand to ease control flow. */
35209 res = gen_reg_rtx (mode);
35210 emit_move_insn (res, operand1);
35212 /* xa = abs (operand1) */
35213 xa = ix86_expand_sse_fabs (res, &mask);
35215 /* if (!isless (xa, TWO52)) goto label; */
35216 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35218 /* xa = (double)(long)x */
35219 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35220 expand_fix (xi, res, 0);
35221 expand_float (xa, xi, 0);
35224 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35226 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35227 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35228 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35229 gen_rtx_AND (mode, one, tmp)));
35230 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
35231 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35232 emit_move_insn (res, tmp);
35234 if (HONOR_SIGNED_ZEROS (mode))
35235 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35237 emit_label (label);
35238 LABEL_NUSES (label) = 1;
35240 emit_move_insn (operand0, res);
35243 /* Expand SSE sequence for computing round from OPERAND1 storing
35244 into OPERAND0. Sequence that works without relying on DImode truncation
35245 via cvttsd2siq that is only available on 64bit targets. */
35247 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
35249 /* C code for the stuff we expand below.
35250 double xa = fabs (x), xa2, x2;
35251 if (!isless (xa, TWO52))
35253 Using the absolute value and copying back sign makes
35254 -0.0 -> -0.0 correct.
35255 xa2 = xa + TWO52 - TWO52;
35260 else if (dxa > 0.5)
35262 x2 = copysign (xa2, x);
35265 enum machine_mode mode = GET_MODE (operand0);
35266 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
35268 TWO52 = ix86_gen_TWO52 (mode);
35270 /* Temporary for holding the result, initialized to the input
35271 operand to ease control flow. */
35272 res = gen_reg_rtx (mode);
35273 emit_move_insn (res, operand1);
35275 /* xa = abs (operand1) */
35276 xa = ix86_expand_sse_fabs (res, &mask);
35278 /* if (!isless (xa, TWO52)) goto label; */
35279 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35281 /* xa2 = xa + TWO52 - TWO52; */
35282 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35283 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
35285 /* dxa = xa2 - xa; */
35286 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
35288 /* generate 0.5, 1.0 and -0.5 */
35289 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
35290 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
35291 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
35295 tmp = gen_reg_rtx (mode);
35296 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35297 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
35298 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35299 gen_rtx_AND (mode, one, tmp)));
35300 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35301 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35302 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
35303 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35304 gen_rtx_AND (mode, one, tmp)));
35305 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35307 /* res = copysign (xa2, operand1) */
35308 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
35310 emit_label (label);
35311 LABEL_NUSES (label) = 1;
35313 emit_move_insn (operand0, res);
35316 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35319 ix86_expand_trunc (rtx operand0, rtx operand1)
35321 /* C code for SSE variant we expand below.
35322 double xa = fabs (x), x2;
35323 if (!isless (xa, TWO52))
35325 x2 = (double)(long)x;
35326 if (HONOR_SIGNED_ZEROS (mode))
35327 return copysign (x2, x);
35330 enum machine_mode mode = GET_MODE (operand0);
35331 rtx xa, xi, TWO52, label, res, mask;
35333 TWO52 = ix86_gen_TWO52 (mode);
35335 /* Temporary for holding the result, initialized to the input
35336 operand to ease control flow. */
35337 res = gen_reg_rtx (mode);
35338 emit_move_insn (res, operand1);
35340 /* xa = abs (operand1) */
35341 xa = ix86_expand_sse_fabs (res, &mask);
35343 /* if (!isless (xa, TWO52)) goto label; */
35344 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35346 /* x = (double)(long)x */
35347 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35348 expand_fix (xi, res, 0);
35349 expand_float (res, xi, 0);
35351 if (HONOR_SIGNED_ZEROS (mode))
35352 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35354 emit_label (label);
35355 LABEL_NUSES (label) = 1;
35357 emit_move_insn (operand0, res);
35360 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35363 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
35365 enum machine_mode mode = GET_MODE (operand0);
35366 rtx xa, mask, TWO52, label, one, res, smask, tmp;
35368 /* C code for SSE variant we expand below.
35369 double xa = fabs (x), x2;
35370 if (!isless (xa, TWO52))
35372 xa2 = xa + TWO52 - TWO52;
35376 x2 = copysign (xa2, x);
35380 TWO52 = ix86_gen_TWO52 (mode);
35382 /* Temporary for holding the result, initialized to the input
35383 operand to ease control flow. */
35384 res = gen_reg_rtx (mode);
35385 emit_move_insn (res, operand1);
35387 /* xa = abs (operand1) */
35388 xa = ix86_expand_sse_fabs (res, &smask);
35390 /* if (!isless (xa, TWO52)) goto label; */
35391 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35393 /* res = xa + TWO52 - TWO52; */
35394 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35395 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
35396 emit_move_insn (res, tmp);
35399 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35401 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35402 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
35403 emit_insn (gen_rtx_SET (VOIDmode, mask,
35404 gen_rtx_AND (mode, mask, one)));
35405 tmp = expand_simple_binop (mode, MINUS,
35406 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
35407 emit_move_insn (res, tmp);
35409 /* res = copysign (res, operand1) */
35410 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
35412 emit_label (label);
35413 LABEL_NUSES (label) = 1;
35415 emit_move_insn (operand0, res);
35418 /* Expand SSE sequence for computing round from OPERAND1 storing
35421 ix86_expand_round (rtx operand0, rtx operand1)
35423 /* C code for the stuff we're doing below:
35424 double xa = fabs (x);
35425 if (!isless (xa, TWO52))
35427 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35428 return copysign (xa, x);
35430 enum machine_mode mode = GET_MODE (operand0);
35431 rtx res, TWO52, xa, label, xi, half, mask;
35432 const struct real_format *fmt;
35433 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35435 /* Temporary for holding the result, initialized to the input
35436 operand to ease control flow. */
35437 res = gen_reg_rtx (mode);
35438 emit_move_insn (res, operand1);
35440 TWO52 = ix86_gen_TWO52 (mode);
35441 xa = ix86_expand_sse_fabs (res, &mask);
35442 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35444 /* load nextafter (0.5, 0.0) */
35445 fmt = REAL_MODE_FORMAT (mode);
35446 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35447 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35449 /* xa = xa + 0.5 */
35450 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
35451 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
35453 /* xa = (double)(int64_t)xa */
35454 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35455 expand_fix (xi, xa, 0);
35456 expand_float (xa, xi, 0);
35458 /* res = copysign (xa, operand1) */
35459 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
35461 emit_label (label);
35462 LABEL_NUSES (label) = 1;
35464 emit_move_insn (operand0, res);
35467 /* Expand SSE sequence for computing round
35468 from OP1 storing into OP0 using sse4 round insn. */
35470 ix86_expand_round_sse4 (rtx op0, rtx op1)
35472 enum machine_mode mode = GET_MODE (op0);
35473 rtx e1, e2, res, half;
35474 const struct real_format *fmt;
35475 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35476 rtx (*gen_copysign) (rtx, rtx, rtx);
35477 rtx (*gen_round) (rtx, rtx, rtx);
35482 gen_copysign = gen_copysignsf3;
35483 gen_round = gen_sse4_1_roundsf2;
35486 gen_copysign = gen_copysigndf3;
35487 gen_round = gen_sse4_1_rounddf2;
35490 gcc_unreachable ();
35493 /* round (a) = trunc (a + copysign (0.5, a)) */
35495 /* load nextafter (0.5, 0.0) */
35496 fmt = REAL_MODE_FORMAT (mode);
35497 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35498 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35499 half = const_double_from_real_value (pred_half, mode);
35501 /* e1 = copysign (0.5, op1) */
35502 e1 = gen_reg_rtx (mode);
35503 emit_insn (gen_copysign (e1, half, op1));
35505 /* e2 = op1 + e1 */
35506 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
35508 /* res = trunc (e2) */
35509 res = gen_reg_rtx (mode);
35510 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
35512 emit_move_insn (op0, res);
35516 /* Table of valid machine attributes. */
35517 static const struct attribute_spec ix86_attribute_table[] =
35519 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35520 affects_type_identity } */
35521 /* Stdcall attribute says callee is responsible for popping arguments
35522 if they are not variable. */
35523 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35525 /* Fastcall attribute says callee is responsible for popping arguments
35526 if they are not variable. */
35527 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35529 /* Thiscall attribute says callee is responsible for popping arguments
35530 if they are not variable. */
35531 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35533 /* Cdecl attribute says the callee is a normal C declaration */
35534 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35536 /* Regparm attribute specifies how many integer arguments are to be
35537 passed in registers. */
35538 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
35540 /* Sseregparm attribute says we are using x86_64 calling conventions
35541 for FP arguments. */
35542 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35544 /* The transactional memory builtins are implicitly regparm or fastcall
35545 depending on the ABI. Override the generic do-nothing attribute that
35546 these builtins were declared with. */
35547 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
35549 /* force_align_arg_pointer says this function realigns the stack at entry. */
35550 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
35551 false, true, true, ix86_handle_cconv_attribute, false },
35552 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35553 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
35554 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
35555 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
35558 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35560 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35562 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35563 SUBTARGET_ATTRIBUTE_TABLE,
35565 /* ms_abi and sysv_abi calling convention function attributes. */
35566 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35567 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35568 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
35570 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35571 ix86_handle_callee_pop_aggregate_return, true },
35573 { NULL, 0, 0, false, false, false, NULL, false }
35576 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35578 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
35579 tree vectype ATTRIBUTE_UNUSED,
35580 int misalign ATTRIBUTE_UNUSED)
35582 switch (type_of_cost)
35585 return ix86_cost->scalar_stmt_cost;
35588 return ix86_cost->scalar_load_cost;
35591 return ix86_cost->scalar_store_cost;
35594 return ix86_cost->vec_stmt_cost;
35597 return ix86_cost->vec_align_load_cost;
35600 return ix86_cost->vec_store_cost;
35602 case vec_to_scalar:
35603 return ix86_cost->vec_to_scalar_cost;
35605 case scalar_to_vec:
35606 return ix86_cost->scalar_to_vec_cost;
35608 case unaligned_load:
35609 case unaligned_store:
35610 return ix86_cost->vec_unalign_load_cost;
35612 case cond_branch_taken:
35613 return ix86_cost->cond_taken_branch_cost;
35615 case cond_branch_not_taken:
35616 return ix86_cost->cond_not_taken_branch_cost;
35619 case vec_promote_demote:
35620 return ix86_cost->vec_stmt_cost;
35623 gcc_unreachable ();
35627 /* Construct (set target (vec_select op0 (parallel perm))) and
35628 return true if that's a valid instruction in the active ISA. */
35631 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
35633 rtx rperm[MAX_VECT_LEN], x;
35636 for (i = 0; i < nelt; ++i)
35637 rperm[i] = GEN_INT (perm[i]);
35639 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
35640 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
35641 x = gen_rtx_SET (VOIDmode, target, x);
35644 if (recog_memoized (x) < 0)
35652 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35655 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
35656 const unsigned char *perm, unsigned nelt)
35658 enum machine_mode v2mode;
35661 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
35662 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
35663 return expand_vselect (target, x, perm, nelt);
35666 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35667 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35670 expand_vec_perm_blend (struct expand_vec_perm_d *d)
35672 enum machine_mode vmode = d->vmode;
35673 unsigned i, mask, nelt = d->nelt;
35674 rtx target, op0, op1, x;
35675 rtx rperm[32], vperm;
35677 if (d->op0 == d->op1)
35679 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
35681 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
35683 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
35688 /* This is a blend, not a permute. Elements must stay in their
35689 respective lanes. */
35690 for (i = 0; i < nelt; ++i)
35692 unsigned e = d->perm[i];
35693 if (!(e == i || e == i + nelt))
35700 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35701 decision should be extracted elsewhere, so that we only try that
35702 sequence once all budget==3 options have been tried. */
35703 target = d->target;
35716 for (i = 0; i < nelt; ++i)
35717 mask |= (d->perm[i] >= nelt) << i;
35721 for (i = 0; i < 2; ++i)
35722 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
35727 for (i = 0; i < 4; ++i)
35728 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35733 /* See if bytes move in pairs so we can use pblendw with
35734 an immediate argument, rather than pblendvb with a vector
35736 for (i = 0; i < 16; i += 2)
35737 if (d->perm[i] + 1 != d->perm[i + 1])
35740 for (i = 0; i < nelt; ++i)
35741 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
35744 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
35745 vperm = force_reg (vmode, vperm);
35747 if (GET_MODE_SIZE (vmode) == 16)
35748 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
35750 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
35754 for (i = 0; i < 8; ++i)
35755 mask |= (d->perm[i * 2] >= 16) << i;
35760 target = gen_lowpart (vmode, target);
35761 op0 = gen_lowpart (vmode, op0);
35762 op1 = gen_lowpart (vmode, op1);
35766 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35767 for (i = 0; i < 32; i += 2)
35768 if (d->perm[i] + 1 != d->perm[i + 1])
35770 /* See if bytes move in quadruplets. If yes, vpblendd
35771 with immediate can be used. */
35772 for (i = 0; i < 32; i += 4)
35773 if (d->perm[i] + 2 != d->perm[i + 2])
35777 /* See if bytes move the same in both lanes. If yes,
35778 vpblendw with immediate can be used. */
35779 for (i = 0; i < 16; i += 2)
35780 if (d->perm[i] + 16 != d->perm[i + 16])
35783 /* Use vpblendw. */
35784 for (i = 0; i < 16; ++i)
35785 mask |= (d->perm[i * 2] >= 32) << i;
35790 /* Use vpblendd. */
35791 for (i = 0; i < 8; ++i)
35792 mask |= (d->perm[i * 4] >= 32) << i;
35797 /* See if words move in pairs. If yes, vpblendd can be used. */
35798 for (i = 0; i < 16; i += 2)
35799 if (d->perm[i] + 1 != d->perm[i + 1])
35803 /* See if words move the same in both lanes. If not,
35804 vpblendvb must be used. */
35805 for (i = 0; i < 8; i++)
35806 if (d->perm[i] + 8 != d->perm[i + 8])
35808 /* Use vpblendvb. */
35809 for (i = 0; i < 32; ++i)
35810 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
35814 target = gen_lowpart (vmode, target);
35815 op0 = gen_lowpart (vmode, op0);
35816 op1 = gen_lowpart (vmode, op1);
35817 goto finish_pblendvb;
35820 /* Use vpblendw. */
35821 for (i = 0; i < 16; ++i)
35822 mask |= (d->perm[i] >= 16) << i;
35826 /* Use vpblendd. */
35827 for (i = 0; i < 8; ++i)
35828 mask |= (d->perm[i * 2] >= 16) << i;
35833 /* Use vpblendd. */
35834 for (i = 0; i < 4; ++i)
35835 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35840 gcc_unreachable ();
35843 /* This matches five different patterns with the different modes. */
35844 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
35845 x = gen_rtx_SET (VOIDmode, target, x);
35851 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35852 in terms of the variable form of vpermilps.
35854 Note that we will have already failed the immediate input vpermilps,
35855 which requires that the high and low part shuffle be identical; the
35856 variable form doesn't require that. */
35859 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
35861 rtx rperm[8], vperm;
35864 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
35867 /* We can only permute within the 128-bit lane. */
35868 for (i = 0; i < 8; ++i)
35870 unsigned e = d->perm[i];
35871 if (i < 4 ? e >= 4 : e < 4)
35878 for (i = 0; i < 8; ++i)
35880 unsigned e = d->perm[i];
35882 /* Within each 128-bit lane, the elements of op0 are numbered
35883 from 0 and the elements of op1 are numbered from 4. */
35889 rperm[i] = GEN_INT (e);
35892 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
35893 vperm = force_reg (V8SImode, vperm);
35894 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
35899 /* Return true if permutation D can be performed as VMODE permutation
35903 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
35905 unsigned int i, j, chunk;
35907 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
35908 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
35909 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
35912 if (GET_MODE_NUNITS (vmode) >= d->nelt)
35915 chunk = d->nelt / GET_MODE_NUNITS (vmode);
35916 for (i = 0; i < d->nelt; i += chunk)
35917 if (d->perm[i] & (chunk - 1))
35920 for (j = 1; j < chunk; ++j)
35921 if (d->perm[i] + j != d->perm[i + j])
35927 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35928 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
35931 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
35933 unsigned i, nelt, eltsz, mask;
35934 unsigned char perm[32];
35935 enum machine_mode vmode = V16QImode;
35936 rtx rperm[32], vperm, target, op0, op1;
35940 if (d->op0 != d->op1)
35942 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
35945 && valid_perm_using_mode_p (V2TImode, d))
35950 /* Use vperm2i128 insn. The pattern uses
35951 V4DImode instead of V2TImode. */
35952 target = gen_lowpart (V4DImode, d->target);
35953 op0 = gen_lowpart (V4DImode, d->op0);
35954 op1 = gen_lowpart (V4DImode, d->op1);
35956 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
35957 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
35958 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
35966 if (GET_MODE_SIZE (d->vmode) == 16)
35971 else if (GET_MODE_SIZE (d->vmode) == 32)
35976 /* V4DImode should be already handled through
35977 expand_vselect by vpermq instruction. */
35978 gcc_assert (d->vmode != V4DImode);
35981 if (d->vmode == V8SImode
35982 || d->vmode == V16HImode
35983 || d->vmode == V32QImode)
35985 /* First see if vpermq can be used for
35986 V8SImode/V16HImode/V32QImode. */
35987 if (valid_perm_using_mode_p (V4DImode, d))
35989 for (i = 0; i < 4; i++)
35990 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
35993 return expand_vselect (gen_lowpart (V4DImode, d->target),
35994 gen_lowpart (V4DImode, d->op0),
35998 /* Next see if vpermd can be used. */
35999 if (valid_perm_using_mode_p (V8SImode, d))
36003 if (vmode == V32QImode)
36005 /* vpshufb only works intra lanes, it is not
36006 possible to shuffle bytes in between the lanes. */
36007 for (i = 0; i < nelt; ++i)
36008 if ((d->perm[i] ^ i) & (nelt / 2))
36019 if (vmode == V8SImode)
36020 for (i = 0; i < 8; ++i)
36021 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
36024 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36025 if (d->op0 != d->op1)
36026 mask = 2 * nelt - 1;
36027 else if (vmode == V16QImode)
36030 mask = nelt / 2 - 1;
36032 for (i = 0; i < nelt; ++i)
36034 unsigned j, e = d->perm[i] & mask;
36035 for (j = 0; j < eltsz; ++j)
36036 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
36040 vperm = gen_rtx_CONST_VECTOR (vmode,
36041 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
36042 vperm = force_reg (vmode, vperm);
36044 target = gen_lowpart (vmode, d->target);
36045 op0 = gen_lowpart (vmode, d->op0);
36046 if (d->op0 == d->op1)
36048 if (vmode == V16QImode)
36049 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
36050 else if (vmode == V32QImode)
36051 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
36053 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
36057 op1 = gen_lowpart (vmode, d->op1);
36058 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
36064 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36065 in a single instruction. */
36068 expand_vec_perm_1 (struct expand_vec_perm_d *d)
36070 unsigned i, nelt = d->nelt;
36071 unsigned char perm2[MAX_VECT_LEN];
36073 /* Check plain VEC_SELECT first, because AVX has instructions that could
36074 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36075 input where SEL+CONCAT may not. */
36076 if (d->op0 == d->op1)
36078 int mask = nelt - 1;
36079 bool identity_perm = true;
36080 bool broadcast_perm = true;
36082 for (i = 0; i < nelt; i++)
36084 perm2[i] = d->perm[i] & mask;
36086 identity_perm = false;
36088 broadcast_perm = false;
36094 emit_move_insn (d->target, d->op0);
36097 else if (broadcast_perm && TARGET_AVX2)
36099 /* Use vpbroadcast{b,w,d}. */
36100 rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
36104 op = gen_lowpart (V16QImode, op);
36105 gen = gen_avx2_pbroadcastv32qi;
36108 op = gen_lowpart (V8HImode, op);
36109 gen = gen_avx2_pbroadcastv16hi;
36112 op = gen_lowpart (V4SImode, op);
36113 gen = gen_avx2_pbroadcastv8si;
36116 gen = gen_avx2_pbroadcastv16qi;
36119 gen = gen_avx2_pbroadcastv8hi;
36121 /* For other modes prefer other shuffles this function creates. */
36127 emit_insn (gen (d->target, op));
36132 if (expand_vselect (d->target, d->op0, perm2, nelt))
36135 /* There are plenty of patterns in sse.md that are written for
36136 SEL+CONCAT and are not replicated for a single op. Perhaps
36137 that should be changed, to avoid the nastiness here. */
36139 /* Recognize interleave style patterns, which means incrementing
36140 every other permutation operand. */
36141 for (i = 0; i < nelt; i += 2)
36143 perm2[i] = d->perm[i] & mask;
36144 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
36146 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36149 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36152 for (i = 0; i < nelt; i += 4)
36154 perm2[i + 0] = d->perm[i + 0] & mask;
36155 perm2[i + 1] = d->perm[i + 1] & mask;
36156 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
36157 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
36160 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36165 /* Finally, try the fully general two operand permute. */
36166 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
36169 /* Recognize interleave style patterns with reversed operands. */
36170 if (d->op0 != d->op1)
36172 for (i = 0; i < nelt; ++i)
36174 unsigned e = d->perm[i];
36182 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
36186 /* Try the SSE4.1 blend variable merge instructions. */
36187 if (expand_vec_perm_blend (d))
36190 /* Try one of the AVX vpermil variable permutations. */
36191 if (expand_vec_perm_vpermil (d))
36194 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36195 vpshufb, vpermd or vpermq variable permutation. */
36196 if (expand_vec_perm_pshufb (d))
36202 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36203 in terms of a pair of pshuflw + pshufhw instructions. */
36206 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
36208 unsigned char perm2[MAX_VECT_LEN];
36212 if (d->vmode != V8HImode || d->op0 != d->op1)
36215 /* The two permutations only operate in 64-bit lanes. */
36216 for (i = 0; i < 4; ++i)
36217 if (d->perm[i] >= 4)
36219 for (i = 4; i < 8; ++i)
36220 if (d->perm[i] < 4)
36226 /* Emit the pshuflw. */
36227 memcpy (perm2, d->perm, 4);
36228 for (i = 4; i < 8; ++i)
36230 ok = expand_vselect (d->target, d->op0, perm2, 8);
36233 /* Emit the pshufhw. */
36234 memcpy (perm2 + 4, d->perm + 4, 4);
36235 for (i = 0; i < 4; ++i)
36237 ok = expand_vselect (d->target, d->target, perm2, 8);
36243 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36244 the permutation using the SSSE3 palignr instruction. This succeeds
36245 when all of the elements in PERM fit within one vector and we merely
36246 need to shift them down so that a single vector permutation has a
36247 chance to succeed. */
36250 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
36252 unsigned i, nelt = d->nelt;
36257 /* Even with AVX, palignr only operates on 128-bit vectors. */
36258 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36261 min = nelt, max = 0;
36262 for (i = 0; i < nelt; ++i)
36264 unsigned e = d->perm[i];
36270 if (min == 0 || max - min >= nelt)
36273 /* Given that we have SSSE3, we know we'll be able to implement the
36274 single operand permutation after the palignr with pshufb. */
36278 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
36279 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
36280 gen_lowpart (TImode, d->op1),
36281 gen_lowpart (TImode, d->op0), shift));
36283 d->op0 = d->op1 = d->target;
36286 for (i = 0; i < nelt; ++i)
36288 unsigned e = d->perm[i] - min;
36294 /* Test for the degenerate case where the alignment by itself
36295 produces the desired permutation. */
36299 ok = expand_vec_perm_1 (d);
36305 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
36307 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36308 a two vector permutation into a single vector permutation by using
36309 an interleave operation to merge the vectors. */
36312 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
36314 struct expand_vec_perm_d dremap, dfinal;
36315 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
36316 unsigned HOST_WIDE_INT contents;
36317 unsigned char remap[2 * MAX_VECT_LEN];
36319 bool ok, same_halves = false;
36321 if (GET_MODE_SIZE (d->vmode) == 16)
36323 if (d->op0 == d->op1)
36326 else if (GET_MODE_SIZE (d->vmode) == 32)
36330 /* For 32-byte modes allow even d->op0 == d->op1.
36331 The lack of cross-lane shuffling in some instructions
36332 might prevent a single insn shuffle. */
36334 dfinal.testing_p = true;
36335 /* If expand_vec_perm_interleave3 can expand this into
36336 a 3 insn sequence, give up and let it be expanded as
36337 3 insn sequence. While that is one insn longer,
36338 it doesn't need a memory operand and in the common
36339 case that both interleave low and high permutations
36340 with the same operands are adjacent needs 4 insns
36341 for both after CSE. */
36342 if (expand_vec_perm_interleave3 (&dfinal))
36348 /* Examine from whence the elements come. */
36350 for (i = 0; i < nelt; ++i)
36351 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
36353 memset (remap, 0xff, sizeof (remap));
36356 if (GET_MODE_SIZE (d->vmode) == 16)
36358 unsigned HOST_WIDE_INT h1, h2, h3, h4;
36360 /* Split the two input vectors into 4 halves. */
36361 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
36366 /* If the elements from the low halves use interleave low, and similarly
36367 for interleave high. If the elements are from mis-matched halves, we
36368 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36369 if ((contents & (h1 | h3)) == contents)
36372 for (i = 0; i < nelt2; ++i)
36375 remap[i + nelt] = i * 2 + 1;
36376 dremap.perm[i * 2] = i;
36377 dremap.perm[i * 2 + 1] = i + nelt;
36379 if (!TARGET_SSE2 && d->vmode == V4SImode)
36380 dremap.vmode = V4SFmode;
36382 else if ((contents & (h2 | h4)) == contents)
36385 for (i = 0; i < nelt2; ++i)
36387 remap[i + nelt2] = i * 2;
36388 remap[i + nelt + nelt2] = i * 2 + 1;
36389 dremap.perm[i * 2] = i + nelt2;
36390 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
36392 if (!TARGET_SSE2 && d->vmode == V4SImode)
36393 dremap.vmode = V4SFmode;
36395 else if ((contents & (h1 | h4)) == contents)
36398 for (i = 0; i < nelt2; ++i)
36401 remap[i + nelt + nelt2] = i + nelt2;
36402 dremap.perm[i] = i;
36403 dremap.perm[i + nelt2] = i + nelt + nelt2;
36408 dremap.vmode = V2DImode;
36410 dremap.perm[0] = 0;
36411 dremap.perm[1] = 3;
36414 else if ((contents & (h2 | h3)) == contents)
36417 for (i = 0; i < nelt2; ++i)
36419 remap[i + nelt2] = i;
36420 remap[i + nelt] = i + nelt2;
36421 dremap.perm[i] = i + nelt2;
36422 dremap.perm[i + nelt2] = i + nelt;
36427 dremap.vmode = V2DImode;
36429 dremap.perm[0] = 1;
36430 dremap.perm[1] = 2;
36438 unsigned int nelt4 = nelt / 4, nzcnt = 0;
36439 unsigned HOST_WIDE_INT q[8];
36440 unsigned int nonzero_halves[4];
36442 /* Split the two input vectors into 8 quarters. */
36443 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
36444 for (i = 1; i < 8; ++i)
36445 q[i] = q[0] << (nelt4 * i);
36446 for (i = 0; i < 4; ++i)
36447 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
36449 nonzero_halves[nzcnt] = i;
36455 gcc_assert (d->op0 == d->op1);
36456 nonzero_halves[1] = nonzero_halves[0];
36457 same_halves = true;
36459 else if (d->op0 == d->op1)
36461 gcc_assert (nonzero_halves[0] == 0);
36462 gcc_assert (nonzero_halves[1] == 1);
36467 if (d->perm[0] / nelt2 == nonzero_halves[1])
36469 /* Attempt to increase the likelyhood that dfinal
36470 shuffle will be intra-lane. */
36471 char tmph = nonzero_halves[0];
36472 nonzero_halves[0] = nonzero_halves[1];
36473 nonzero_halves[1] = tmph;
36476 /* vperm2f128 or vperm2i128. */
36477 for (i = 0; i < nelt2; ++i)
36479 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
36480 remap[i + nonzero_halves[0] * nelt2] = i;
36481 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
36482 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
36485 if (d->vmode != V8SFmode
36486 && d->vmode != V4DFmode
36487 && d->vmode != V8SImode)
36489 dremap.vmode = V8SImode;
36491 for (i = 0; i < 4; ++i)
36493 dremap.perm[i] = i + nonzero_halves[0] * 4;
36494 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
36498 else if (d->op0 == d->op1)
36500 else if (TARGET_AVX2
36501 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
36504 for (i = 0; i < nelt4; ++i)
36507 remap[i + nelt] = i * 2 + 1;
36508 remap[i + nelt2] = i * 2 + nelt2;
36509 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
36510 dremap.perm[i * 2] = i;
36511 dremap.perm[i * 2 + 1] = i + nelt;
36512 dremap.perm[i * 2 + nelt2] = i + nelt2;
36513 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
36516 else if (TARGET_AVX2
36517 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
36520 for (i = 0; i < nelt4; ++i)
36522 remap[i + nelt4] = i * 2;
36523 remap[i + nelt + nelt4] = i * 2 + 1;
36524 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
36525 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
36526 dremap.perm[i * 2] = i + nelt4;
36527 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
36528 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
36529 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
36536 /* Use the remapping array set up above to move the elements from their
36537 swizzled locations into their final destinations. */
36539 for (i = 0; i < nelt; ++i)
36541 unsigned e = remap[d->perm[i]];
36542 gcc_assert (e < nelt);
36543 /* If same_halves is true, both halves of the remapped vector are the
36544 same. Avoid cross-lane accesses if possible. */
36545 if (same_halves && i >= nelt2)
36547 gcc_assert (e < nelt2);
36548 dfinal.perm[i] = e + nelt2;
36551 dfinal.perm[i] = e;
36553 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
36554 dfinal.op1 = dfinal.op0;
36555 dremap.target = dfinal.op0;
36557 /* Test if the final remap can be done with a single insn. For V4SFmode or
36558 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36560 ok = expand_vec_perm_1 (&dfinal);
36561 seq = get_insns ();
36570 if (dremap.vmode != dfinal.vmode)
36572 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
36573 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
36574 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
36577 ok = expand_vec_perm_1 (&dremap);
36584 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36585 a single vector cross-lane permutation into vpermq followed
36586 by any of the single insn permutations. */
36589 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
36591 struct expand_vec_perm_d dremap, dfinal;
36592 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
36593 unsigned contents[2];
36597 && (d->vmode == V32QImode || d->vmode == V16HImode)
36598 && d->op0 == d->op1))
36603 for (i = 0; i < nelt2; ++i)
36605 contents[0] |= 1u << (d->perm[i] / nelt4);
36606 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
36609 for (i = 0; i < 2; ++i)
36611 unsigned int cnt = 0;
36612 for (j = 0; j < 4; ++j)
36613 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
36621 dremap.vmode = V4DImode;
36623 dremap.target = gen_reg_rtx (V4DImode);
36624 dremap.op0 = gen_lowpart (V4DImode, d->op0);
36625 dremap.op1 = dremap.op0;
36626 for (i = 0; i < 2; ++i)
36628 unsigned int cnt = 0;
36629 for (j = 0; j < 4; ++j)
36630 if ((contents[i] & (1u << j)) != 0)
36631 dremap.perm[2 * i + cnt++] = j;
36632 for (; cnt < 2; ++cnt)
36633 dremap.perm[2 * i + cnt] = 0;
36637 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
36638 dfinal.op1 = dfinal.op0;
36639 for (i = 0, j = 0; i < nelt; ++i)
36643 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
36644 if ((d->perm[i] / nelt4) == dremap.perm[j])
36646 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
36647 dfinal.perm[i] |= nelt4;
36649 gcc_unreachable ();
36652 ok = expand_vec_perm_1 (&dremap);
36655 ok = expand_vec_perm_1 (&dfinal);
36661 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36662 a two vector permutation using 2 intra-lane interleave insns
36663 and cross-lane shuffle for 32-byte vectors. */
36666 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
36669 rtx (*gen) (rtx, rtx, rtx);
36671 if (d->op0 == d->op1)
36673 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
36675 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
36681 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
36683 for (i = 0; i < nelt; i += 2)
36684 if (d->perm[i] != d->perm[0] + i / 2
36685 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
36695 gen = gen_vec_interleave_highv32qi;
36697 gen = gen_vec_interleave_lowv32qi;
36701 gen = gen_vec_interleave_highv16hi;
36703 gen = gen_vec_interleave_lowv16hi;
36707 gen = gen_vec_interleave_highv8si;
36709 gen = gen_vec_interleave_lowv8si;
36713 gen = gen_vec_interleave_highv4di;
36715 gen = gen_vec_interleave_lowv4di;
36719 gen = gen_vec_interleave_highv8sf;
36721 gen = gen_vec_interleave_lowv8sf;
36725 gen = gen_vec_interleave_highv4df;
36727 gen = gen_vec_interleave_lowv4df;
36730 gcc_unreachable ();
36733 emit_insn (gen (d->target, d->op0, d->op1));
36737 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36738 permutation with two pshufb insns and an ior. We should have already
36739 failed all two instruction sequences. */
36742 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
36744 rtx rperm[2][16], vperm, l, h, op, m128;
36745 unsigned int i, nelt, eltsz;
36747 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36749 gcc_assert (d->op0 != d->op1);
36752 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36754 /* Generate two permutation masks. If the required element is within
36755 the given vector it is shuffled into the proper lane. If the required
36756 element is in the other vector, force a zero into the lane by setting
36757 bit 7 in the permutation mask. */
36758 m128 = GEN_INT (-128);
36759 for (i = 0; i < nelt; ++i)
36761 unsigned j, e = d->perm[i];
36762 unsigned which = (e >= nelt);
36766 for (j = 0; j < eltsz; ++j)
36768 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
36769 rperm[1-which][i*eltsz + j] = m128;
36773 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
36774 vperm = force_reg (V16QImode, vperm);
36776 l = gen_reg_rtx (V16QImode);
36777 op = gen_lowpart (V16QImode, d->op0);
36778 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
36780 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
36781 vperm = force_reg (V16QImode, vperm);
36783 h = gen_reg_rtx (V16QImode);
36784 op = gen_lowpart (V16QImode, d->op1);
36785 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
36787 op = gen_lowpart (V16QImode, d->target);
36788 emit_insn (gen_iorv16qi3 (op, l, h));
36793 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36794 with two vpshufb insns, vpermq and vpor. We should have already failed
36795 all two or three instruction sequences. */
36798 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
36800 rtx rperm[2][32], vperm, l, h, hp, op, m128;
36801 unsigned int i, nelt, eltsz;
36804 || d->op0 != d->op1
36805 || (d->vmode != V32QImode && d->vmode != V16HImode))
36812 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36814 /* Generate two permutation masks. If the required element is within
36815 the same lane, it is shuffled in. If the required element from the
36816 other lane, force a zero by setting bit 7 in the permutation mask.
36817 In the other mask the mask has non-negative elements if element
36818 is requested from the other lane, but also moved to the other lane,
36819 so that the result of vpshufb can have the two V2TImode halves
36821 m128 = GEN_INT (-128);
36822 for (i = 0; i < nelt; ++i)
36824 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36825 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
36827 for (j = 0; j < eltsz; ++j)
36829 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
36830 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
36834 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36835 vperm = force_reg (V32QImode, vperm);
36837 h = gen_reg_rtx (V32QImode);
36838 op = gen_lowpart (V32QImode, d->op0);
36839 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36841 /* Swap the 128-byte lanes of h into hp. */
36842 hp = gen_reg_rtx (V4DImode);
36843 op = gen_lowpart (V4DImode, h);
36844 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
36847 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36848 vperm = force_reg (V32QImode, vperm);
36850 l = gen_reg_rtx (V32QImode);
36851 op = gen_lowpart (V32QImode, d->op0);
36852 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36854 op = gen_lowpart (V32QImode, d->target);
36855 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
36860 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36861 and extract-odd permutations of two V32QImode and V16QImode operand
36862 with two vpshufb insns, vpor and vpermq. We should have already
36863 failed all two or three instruction sequences. */
36866 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
36868 rtx rperm[2][32], vperm, l, h, ior, op, m128;
36869 unsigned int i, nelt, eltsz;
36872 || d->op0 == d->op1
36873 || (d->vmode != V32QImode && d->vmode != V16HImode))
36876 for (i = 0; i < d->nelt; ++i)
36877 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
36884 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36886 /* Generate two permutation masks. In the first permutation mask
36887 the first quarter will contain indexes for the first half
36888 of the op0, the second quarter will contain bit 7 set, third quarter
36889 will contain indexes for the second half of the op0 and the
36890 last quarter bit 7 set. In the second permutation mask
36891 the first quarter will contain bit 7 set, the second quarter
36892 indexes for the first half of the op1, the third quarter bit 7 set
36893 and last quarter indexes for the second half of the op1.
36894 I.e. the first mask e.g. for V32QImode extract even will be:
36895 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
36896 (all values masked with 0xf except for -128) and second mask
36897 for extract even will be
36898 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
36899 m128 = GEN_INT (-128);
36900 for (i = 0; i < nelt; ++i)
36902 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36903 unsigned which = d->perm[i] >= nelt;
36904 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
36906 for (j = 0; j < eltsz; ++j)
36908 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
36909 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
36913 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36914 vperm = force_reg (V32QImode, vperm);
36916 l = gen_reg_rtx (V32QImode);
36917 op = gen_lowpart (V32QImode, d->op0);
36918 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36920 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36921 vperm = force_reg (V32QImode, vperm);
36923 h = gen_reg_rtx (V32QImode);
36924 op = gen_lowpart (V32QImode, d->op1);
36925 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36927 ior = gen_reg_rtx (V32QImode);
36928 emit_insn (gen_iorv32qi3 (ior, l, h));
36930 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
36931 op = gen_lowpart (V4DImode, d->target);
36932 ior = gen_lowpart (V4DImode, ior);
36933 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
36934 const1_rtx, GEN_INT (3)));
36939 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
36940 and extract-odd permutations. */
36943 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
36950 t1 = gen_reg_rtx (V4DFmode);
36951 t2 = gen_reg_rtx (V4DFmode);
36953 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36954 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
36955 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
36957 /* Now an unpck[lh]pd will produce the result required. */
36959 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
36961 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
36967 int mask = odd ? 0xdd : 0x88;
36969 t1 = gen_reg_rtx (V8SFmode);
36970 t2 = gen_reg_rtx (V8SFmode);
36971 t3 = gen_reg_rtx (V8SFmode);
36973 /* Shuffle within the 128-bit lanes to produce:
36974 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
36975 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
36978 /* Shuffle the lanes around to produce:
36979 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
36980 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
36983 /* Shuffle within the 128-bit lanes to produce:
36984 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
36985 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
36987 /* Shuffle within the 128-bit lanes to produce:
36988 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
36989 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
36991 /* Shuffle the lanes around to produce:
36992 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
36993 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
37002 /* These are always directly implementable by expand_vec_perm_1. */
37003 gcc_unreachable ();
37007 return expand_vec_perm_pshufb2 (d);
37010 /* We need 2*log2(N)-1 operations to achieve odd/even
37011 with interleave. */
37012 t1 = gen_reg_rtx (V8HImode);
37013 t2 = gen_reg_rtx (V8HImode);
37014 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
37015 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
37016 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
37017 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
37019 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
37021 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
37028 return expand_vec_perm_pshufb2 (d);
37031 t1 = gen_reg_rtx (V16QImode);
37032 t2 = gen_reg_rtx (V16QImode);
37033 t3 = gen_reg_rtx (V16QImode);
37034 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
37035 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
37036 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
37037 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
37038 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
37039 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
37041 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
37043 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
37050 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
37055 struct expand_vec_perm_d d_copy = *d;
37056 d_copy.vmode = V4DFmode;
37057 d_copy.target = gen_lowpart (V4DFmode, d->target);
37058 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
37059 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
37060 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37063 t1 = gen_reg_rtx (V4DImode);
37064 t2 = gen_reg_rtx (V4DImode);
37066 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37067 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
37068 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
37070 /* Now an vpunpck[lh]qdq will produce the result required. */
37072 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
37074 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
37081 struct expand_vec_perm_d d_copy = *d;
37082 d_copy.vmode = V8SFmode;
37083 d_copy.target = gen_lowpart (V8SFmode, d->target);
37084 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
37085 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
37086 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37089 t1 = gen_reg_rtx (V8SImode);
37090 t2 = gen_reg_rtx (V8SImode);
37092 /* Shuffle the lanes around into
37093 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37094 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
37095 gen_lowpart (V4DImode, d->op0),
37096 gen_lowpart (V4DImode, d->op1),
37098 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
37099 gen_lowpart (V4DImode, d->op0),
37100 gen_lowpart (V4DImode, d->op1),
37103 /* Swap the 2nd and 3rd position in each lane into
37104 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37105 emit_insn (gen_avx2_pshufdv3 (t1, t1,
37106 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37107 emit_insn (gen_avx2_pshufdv3 (t2, t2,
37108 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37110 /* Now an vpunpck[lh]qdq will produce
37111 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37113 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
37114 gen_lowpart (V4DImode, t1),
37115 gen_lowpart (V4DImode, t2));
37117 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
37118 gen_lowpart (V4DImode, t1),
37119 gen_lowpart (V4DImode, t2));
37124 gcc_unreachable ();
37130 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37131 extract-even and extract-odd permutations. */
37134 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
37136 unsigned i, odd, nelt = d->nelt;
37139 if (odd != 0 && odd != 1)
37142 for (i = 1; i < nelt; ++i)
37143 if (d->perm[i] != 2 * i + odd)
37146 return expand_vec_perm_even_odd_1 (d, odd);
37149 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37150 permutations. We assume that expand_vec_perm_1 has already failed. */
37153 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
37155 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
37156 enum machine_mode vmode = d->vmode;
37157 unsigned char perm2[4];
37165 /* These are special-cased in sse.md so that we can optionally
37166 use the vbroadcast instruction. They expand to two insns
37167 if the input happens to be in a register. */
37168 gcc_unreachable ();
37174 /* These are always implementable using standard shuffle patterns. */
37175 gcc_unreachable ();
37179 /* These can be implemented via interleave. We save one insn by
37180 stopping once we have promoted to V4SImode and then use pshufd. */
37184 rtx (*gen) (rtx, rtx, rtx)
37185 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
37186 : gen_vec_interleave_lowv8hi;
37190 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
37191 : gen_vec_interleave_highv8hi;
37196 dest = gen_reg_rtx (vmode);
37197 emit_insn (gen (dest, op0, op0));
37198 vmode = get_mode_wider_vector (vmode);
37199 op0 = gen_lowpart (vmode, dest);
37201 while (vmode != V4SImode);
37203 memset (perm2, elt, 4);
37204 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
37212 /* For AVX2 broadcasts of the first element vpbroadcast* or
37213 vpermq should be used by expand_vec_perm_1. */
37214 gcc_assert (!TARGET_AVX2 || d->perm[0]);
37218 gcc_unreachable ();
37222 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37223 broadcast permutations. */
37226 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
37228 unsigned i, elt, nelt = d->nelt;
37230 if (d->op0 != d->op1)
37234 for (i = 1; i < nelt; ++i)
37235 if (d->perm[i] != elt)
37238 return expand_vec_perm_broadcast_1 (d);
37241 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37242 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37243 all the shorter instruction sequences. */
37246 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
37248 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
37249 unsigned int i, nelt, eltsz;
37253 || d->op0 == d->op1
37254 || (d->vmode != V32QImode && d->vmode != V16HImode))
37261 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37263 /* Generate 4 permutation masks. If the required element is within
37264 the same lane, it is shuffled in. If the required element from the
37265 other lane, force a zero by setting bit 7 in the permutation mask.
37266 In the other mask the mask has non-negative elements if element
37267 is requested from the other lane, but also moved to the other lane,
37268 so that the result of vpshufb can have the two V2TImode halves
37270 m128 = GEN_INT (-128);
37271 for (i = 0; i < 32; ++i)
37273 rperm[0][i] = m128;
37274 rperm[1][i] = m128;
37275 rperm[2][i] = m128;
37276 rperm[3][i] = m128;
37282 for (i = 0; i < nelt; ++i)
37284 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37285 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
37286 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
37288 for (j = 0; j < eltsz; ++j)
37289 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
37290 used[which] = true;
37293 for (i = 0; i < 2; ++i)
37295 if (!used[2 * i + 1])
37300 vperm = gen_rtx_CONST_VECTOR (V32QImode,
37301 gen_rtvec_v (32, rperm[2 * i + 1]));
37302 vperm = force_reg (V32QImode, vperm);
37303 h[i] = gen_reg_rtx (V32QImode);
37304 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37305 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
37308 /* Swap the 128-byte lanes of h[X]. */
37309 for (i = 0; i < 2; ++i)
37311 if (h[i] == NULL_RTX)
37313 op = gen_reg_rtx (V4DImode);
37314 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
37315 const2_rtx, GEN_INT (3), const0_rtx,
37317 h[i] = gen_lowpart (V32QImode, op);
37320 for (i = 0; i < 2; ++i)
37327 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
37328 vperm = force_reg (V32QImode, vperm);
37329 l[i] = gen_reg_rtx (V32QImode);
37330 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37331 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
37334 for (i = 0; i < 2; ++i)
37338 op = gen_reg_rtx (V32QImode);
37339 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
37346 gcc_assert (l[0] && l[1]);
37347 op = gen_lowpart (V32QImode, d->target);
37348 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
37352 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37353 With all of the interface bits taken care of, perform the expansion
37354 in D and return true on success. */
37357 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
37359 /* Try a single instruction expansion. */
37360 if (expand_vec_perm_1 (d))
37363 /* Try sequences of two instructions. */
37365 if (expand_vec_perm_pshuflw_pshufhw (d))
37368 if (expand_vec_perm_palignr (d))
37371 if (expand_vec_perm_interleave2 (d))
37374 if (expand_vec_perm_broadcast (d))
37377 if (expand_vec_perm_vpermq_perm_1 (d))
37380 /* Try sequences of three instructions. */
37382 if (expand_vec_perm_pshufb2 (d))
37385 if (expand_vec_perm_interleave3 (d))
37388 /* Try sequences of four instructions. */
37390 if (expand_vec_perm_vpshufb2_vpermq (d))
37393 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
37396 /* ??? Look for narrow permutations whose element orderings would
37397 allow the promotion to a wider mode. */
37399 /* ??? Look for sequences of interleave or a wider permute that place
37400 the data into the correct lanes for a half-vector shuffle like
37401 pshuf[lh]w or vpermilps. */
37403 /* ??? Look for sequences of interleave that produce the desired results.
37404 The combinatorics of punpck[lh] get pretty ugly... */
37406 if (expand_vec_perm_even_odd (d))
37409 /* Even longer sequences. */
37410 if (expand_vec_perm_vpshufb4_vpermq2 (d))
37417 ix86_expand_vec_perm_const (rtx operands[4])
37419 struct expand_vec_perm_d d;
37420 unsigned char perm[MAX_VECT_LEN];
37421 int i, nelt, which;
37424 d.target = operands[0];
37425 d.op0 = operands[1];
37426 d.op1 = operands[2];
37429 d.vmode = GET_MODE (d.target);
37430 gcc_assert (VECTOR_MODE_P (d.vmode));
37431 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37432 d.testing_p = false;
37434 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
37435 gcc_assert (XVECLEN (sel, 0) == nelt);
37436 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
37438 for (i = which = 0; i < nelt; ++i)
37440 rtx e = XVECEXP (sel, 0, i);
37441 int ei = INTVAL (e) & (2 * nelt - 1);
37443 which |= (ei < nelt ? 1 : 2);
37454 if (!rtx_equal_p (d.op0, d.op1))
37457 /* The elements of PERM do not suggest that only the first operand
37458 is used, but both operands are identical. Allow easier matching
37459 of the permutation by folding the permutation into the single
37461 for (i = 0; i < nelt; ++i)
37462 if (d.perm[i] >= nelt)
37471 for (i = 0; i < nelt; ++i)
37477 if (ix86_expand_vec_perm_const_1 (&d))
37480 /* If the mask says both arguments are needed, but they are the same,
37481 the above tried to expand with d.op0 == d.op1. If that didn't work,
37482 retry with d.op0 != d.op1 as that is what testing has been done with. */
37483 if (which == 3 && d.op0 == d.op1)
37488 memcpy (d.perm, perm, sizeof (perm));
37489 d.op1 = gen_reg_rtx (d.vmode);
37491 ok = ix86_expand_vec_perm_const_1 (&d);
37492 seq = get_insns ();
37496 emit_move_insn (d.op1, d.op0);
37505 /* Implement targetm.vectorize.vec_perm_const_ok. */
37508 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
37509 const unsigned char *sel)
37511 struct expand_vec_perm_d d;
37512 unsigned int i, nelt, which;
37516 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37517 d.testing_p = true;
37519 /* Given sufficient ISA support we can just return true here
37520 for selected vector modes. */
37521 if (GET_MODE_SIZE (d.vmode) == 16)
37523 /* All implementable with a single vpperm insn. */
37526 /* All implementable with 2 pshufb + 1 ior. */
37529 /* All implementable with shufpd or unpck[lh]pd. */
37534 /* Extract the values from the vector CST into the permutation
37536 memcpy (d.perm, sel, nelt);
37537 for (i = which = 0; i < nelt; ++i)
37539 unsigned char e = d.perm[i];
37540 gcc_assert (e < 2 * nelt);
37541 which |= (e < nelt ? 1 : 2);
37544 /* For all elements from second vector, fold the elements to first. */
37546 for (i = 0; i < nelt; ++i)
37549 /* Check whether the mask can be applied to the vector type. */
37550 one_vec = (which != 3);
37552 /* Implementable with shufps or pshufd. */
37553 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
37556 /* Otherwise we have to go through the motions and see if we can
37557 figure out how to generate the requested permutation. */
37558 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
37559 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
37561 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
37564 ret = ix86_expand_vec_perm_const_1 (&d);
37571 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
37573 struct expand_vec_perm_d d;
37579 d.vmode = GET_MODE (targ);
37580 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37581 d.testing_p = false;
37583 for (i = 0; i < nelt; ++i)
37584 d.perm[i] = i * 2 + odd;
37586 /* We'll either be able to implement the permutation directly... */
37587 if (expand_vec_perm_1 (&d))
37590 /* ... or we use the special-case patterns. */
37591 expand_vec_perm_even_odd_1 (&d, odd);
37594 /* Expand an insert into a vector register through pinsr insn.
37595 Return true if successful. */
37598 ix86_expand_pinsr (rtx *operands)
37600 rtx dst = operands[0];
37601 rtx src = operands[3];
37603 unsigned int size = INTVAL (operands[1]);
37604 unsigned int pos = INTVAL (operands[2]);
37606 if (GET_CODE (dst) == SUBREG)
37608 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
37609 dst = SUBREG_REG (dst);
37612 if (GET_CODE (src) == SUBREG)
37613 src = SUBREG_REG (src);
37615 switch (GET_MODE (dst))
37622 enum machine_mode srcmode, dstmode;
37623 rtx (*pinsr)(rtx, rtx, rtx, rtx);
37625 srcmode = mode_for_size (size, MODE_INT, 0);
37630 if (!TARGET_SSE4_1)
37632 dstmode = V16QImode;
37633 pinsr = gen_sse4_1_pinsrb;
37639 dstmode = V8HImode;
37640 pinsr = gen_sse2_pinsrw;
37644 if (!TARGET_SSE4_1)
37646 dstmode = V4SImode;
37647 pinsr = gen_sse4_1_pinsrd;
37651 gcc_assert (TARGET_64BIT);
37652 if (!TARGET_SSE4_1)
37654 dstmode = V2DImode;
37655 pinsr = gen_sse4_1_pinsrq;
37662 dst = gen_lowpart (dstmode, dst);
37663 src = gen_lowpart (srcmode, src);
37667 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
37676 /* This function returns the calling abi specific va_list type node.
37677 It returns the FNDECL specific va_list type. */
37680 ix86_fn_abi_va_list (tree fndecl)
37683 return va_list_type_node;
37684 gcc_assert (fndecl != NULL_TREE);
37686 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
37687 return ms_va_list_type_node;
37689 return sysv_va_list_type_node;
37692 /* Returns the canonical va_list type specified by TYPE. If there
37693 is no valid TYPE provided, it return NULL_TREE. */
37696 ix86_canonical_va_list_type (tree type)
37700 /* Resolve references and pointers to va_list type. */
37701 if (TREE_CODE (type) == MEM_REF)
37702 type = TREE_TYPE (type);
37703 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
37704 type = TREE_TYPE (type);
37705 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
37706 type = TREE_TYPE (type);
37708 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
37710 wtype = va_list_type_node;
37711 gcc_assert (wtype != NULL_TREE);
37713 if (TREE_CODE (wtype) == ARRAY_TYPE)
37715 /* If va_list is an array type, the argument may have decayed
37716 to a pointer type, e.g. by being passed to another function.
37717 In that case, unwrap both types so that we can compare the
37718 underlying records. */
37719 if (TREE_CODE (htype) == ARRAY_TYPE
37720 || POINTER_TYPE_P (htype))
37722 wtype = TREE_TYPE (wtype);
37723 htype = TREE_TYPE (htype);
37726 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37727 return va_list_type_node;
37728 wtype = sysv_va_list_type_node;
37729 gcc_assert (wtype != NULL_TREE);
37731 if (TREE_CODE (wtype) == ARRAY_TYPE)
37733 /* If va_list is an array type, the argument may have decayed
37734 to a pointer type, e.g. by being passed to another function.
37735 In that case, unwrap both types so that we can compare the
37736 underlying records. */
37737 if (TREE_CODE (htype) == ARRAY_TYPE
37738 || POINTER_TYPE_P (htype))
37740 wtype = TREE_TYPE (wtype);
37741 htype = TREE_TYPE (htype);
37744 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37745 return sysv_va_list_type_node;
37746 wtype = ms_va_list_type_node;
37747 gcc_assert (wtype != NULL_TREE);
37749 if (TREE_CODE (wtype) == ARRAY_TYPE)
37751 /* If va_list is an array type, the argument may have decayed
37752 to a pointer type, e.g. by being passed to another function.
37753 In that case, unwrap both types so that we can compare the
37754 underlying records. */
37755 if (TREE_CODE (htype) == ARRAY_TYPE
37756 || POINTER_TYPE_P (htype))
37758 wtype = TREE_TYPE (wtype);
37759 htype = TREE_TYPE (htype);
37762 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37763 return ms_va_list_type_node;
37766 return std_canonical_va_list_type (type);
37769 /* Iterate through the target-specific builtin types for va_list.
37770 IDX denotes the iterator, *PTREE is set to the result type of
37771 the va_list builtin, and *PNAME to its internal type.
37772 Returns zero if there is no element for this index, otherwise
37773 IDX should be increased upon the next call.
37774 Note, do not iterate a base builtin's name like __builtin_va_list.
37775 Used from c_common_nodes_and_builtins. */
37778 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
37788 *ptree = ms_va_list_type_node;
37789 *pname = "__builtin_ms_va_list";
37793 *ptree = sysv_va_list_type_node;
37794 *pname = "__builtin_sysv_va_list";
37802 #undef TARGET_SCHED_DISPATCH
37803 #define TARGET_SCHED_DISPATCH has_dispatch
37804 #undef TARGET_SCHED_DISPATCH_DO
37805 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37806 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37807 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37809 /* The size of the dispatch window is the total number of bytes of
37810 object code allowed in a window. */
37811 #define DISPATCH_WINDOW_SIZE 16
37813 /* Number of dispatch windows considered for scheduling. */
37814 #define MAX_DISPATCH_WINDOWS 3
37816 /* Maximum number of instructions in a window. */
37819 /* Maximum number of immediate operands in a window. */
37822 /* Maximum number of immediate bits allowed in a window. */
37823 #define MAX_IMM_SIZE 128
37825 /* Maximum number of 32 bit immediates allowed in a window. */
37826 #define MAX_IMM_32 4
37828 /* Maximum number of 64 bit immediates allowed in a window. */
37829 #define MAX_IMM_64 2
37831 /* Maximum total of loads or prefetches allowed in a window. */
37834 /* Maximum total of stores allowed in a window. */
37835 #define MAX_STORE 1
37841 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37842 enum dispatch_group {
37857 /* Number of allowable groups in a dispatch window. It is an array
37858 indexed by dispatch_group enum. 100 is used as a big number,
37859 because the number of these kind of operations does not have any
37860 effect in dispatch window, but we need them for other reasons in
37862 static unsigned int num_allowable_groups[disp_last] = {
37863 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
37866 char group_name[disp_last + 1][16] = {
37867 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37868 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37869 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37872 /* Instruction path. */
37875 path_single, /* Single micro op. */
37876 path_double, /* Double micro op. */
37877 path_multi, /* Instructions with more than 2 micro op.. */
37881 /* sched_insn_info defines a window to the instructions scheduled in
37882 the basic block. It contains a pointer to the insn_info table and
37883 the instruction scheduled.
37885 Windows are allocated for each basic block and are linked
37887 typedef struct sched_insn_info_s {
37889 enum dispatch_group group;
37890 enum insn_path path;
37895 /* Linked list of dispatch windows. This is a two way list of
37896 dispatch windows of a basic block. It contains information about
37897 the number of uops in the window and the total number of
37898 instructions and of bytes in the object code for this dispatch
37900 typedef struct dispatch_windows_s {
37901 int num_insn; /* Number of insn in the window. */
37902 int num_uops; /* Number of uops in the window. */
37903 int window_size; /* Number of bytes in the window. */
37904 int window_num; /* Window number between 0 or 1. */
37905 int num_imm; /* Number of immediates in an insn. */
37906 int num_imm_32; /* Number of 32 bit immediates in an insn. */
37907 int num_imm_64; /* Number of 64 bit immediates in an insn. */
37908 int imm_size; /* Total immediates in the window. */
37909 int num_loads; /* Total memory loads in the window. */
37910 int num_stores; /* Total memory stores in the window. */
37911 int violation; /* Violation exists in window. */
37912 sched_insn_info *window; /* Pointer to the window. */
37913 struct dispatch_windows_s *next;
37914 struct dispatch_windows_s *prev;
37915 } dispatch_windows;
37917 /* Immediate valuse used in an insn. */
37918 typedef struct imm_info_s
37925 static dispatch_windows *dispatch_window_list;
37926 static dispatch_windows *dispatch_window_list1;
37928 /* Get dispatch group of insn. */
37930 static enum dispatch_group
37931 get_mem_group (rtx insn)
37933 enum attr_memory memory;
37935 if (INSN_CODE (insn) < 0)
37936 return disp_no_group;
37937 memory = get_attr_memory (insn);
37938 if (memory == MEMORY_STORE)
37941 if (memory == MEMORY_LOAD)
37944 if (memory == MEMORY_BOTH)
37945 return disp_load_store;
37947 return disp_no_group;
37950 /* Return true if insn is a compare instruction. */
37955 enum attr_type type;
37957 type = get_attr_type (insn);
37958 return (type == TYPE_TEST
37959 || type == TYPE_ICMP
37960 || type == TYPE_FCMP
37961 || GET_CODE (PATTERN (insn)) == COMPARE);
37964 /* Return true if a dispatch violation encountered. */
37967 dispatch_violation (void)
37969 if (dispatch_window_list->next)
37970 return dispatch_window_list->next->violation;
37971 return dispatch_window_list->violation;
37974 /* Return true if insn is a branch instruction. */
37977 is_branch (rtx insn)
37979 return (CALL_P (insn) || JUMP_P (insn));
37982 /* Return true if insn is a prefetch instruction. */
37985 is_prefetch (rtx insn)
37987 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
37990 /* This function initializes a dispatch window and the list container holding a
37991 pointer to the window. */
37994 init_window (int window_num)
37997 dispatch_windows *new_list;
37999 if (window_num == 0)
38000 new_list = dispatch_window_list;
38002 new_list = dispatch_window_list1;
38004 new_list->num_insn = 0;
38005 new_list->num_uops = 0;
38006 new_list->window_size = 0;
38007 new_list->next = NULL;
38008 new_list->prev = NULL;
38009 new_list->window_num = window_num;
38010 new_list->num_imm = 0;
38011 new_list->num_imm_32 = 0;
38012 new_list->num_imm_64 = 0;
38013 new_list->imm_size = 0;
38014 new_list->num_loads = 0;
38015 new_list->num_stores = 0;
38016 new_list->violation = false;
38018 for (i = 0; i < MAX_INSN; i++)
38020 new_list->window[i].insn = NULL;
38021 new_list->window[i].group = disp_no_group;
38022 new_list->window[i].path = no_path;
38023 new_list->window[i].byte_len = 0;
38024 new_list->window[i].imm_bytes = 0;
38029 /* This function allocates and initializes a dispatch window and the
38030 list container holding a pointer to the window. */
38032 static dispatch_windows *
38033 allocate_window (void)
38035 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
38036 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
38041 /* This routine initializes the dispatch scheduling information. It
38042 initiates building dispatch scheduler tables and constructs the
38043 first dispatch window. */
38046 init_dispatch_sched (void)
38048 /* Allocate a dispatch list and a window. */
38049 dispatch_window_list = allocate_window ();
38050 dispatch_window_list1 = allocate_window ();
38055 /* This function returns true if a branch is detected. End of a basic block
38056 does not have to be a branch, but here we assume only branches end a
38060 is_end_basic_block (enum dispatch_group group)
38062 return group == disp_branch;
38065 /* This function is called when the end of a window processing is reached. */
38068 process_end_window (void)
38070 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
38071 if (dispatch_window_list->next)
38073 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
38074 gcc_assert (dispatch_window_list->window_size
38075 + dispatch_window_list1->window_size <= 48);
38081 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38082 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38083 for 48 bytes of instructions. Note that these windows are not dispatch
38084 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38086 static dispatch_windows *
38087 allocate_next_window (int window_num)
38089 if (window_num == 0)
38091 if (dispatch_window_list->next)
38094 return dispatch_window_list;
38097 dispatch_window_list->next = dispatch_window_list1;
38098 dispatch_window_list1->prev = dispatch_window_list;
38100 return dispatch_window_list1;
38103 /* Increment the number of immediate operands of an instruction. */
38106 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
38111 switch ( GET_CODE (*in_rtx))
38116 (imm_values->imm)++;
38117 if (x86_64_immediate_operand (*in_rtx, SImode))
38118 (imm_values->imm32)++;
38120 (imm_values->imm64)++;
38124 (imm_values->imm)++;
38125 (imm_values->imm64)++;
38129 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
38131 (imm_values->imm)++;
38132 (imm_values->imm32)++;
38143 /* Compute number of immediate operands of an instruction. */
38146 find_constant (rtx in_rtx, imm_info *imm_values)
38148 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
38149 (rtx_function) find_constant_1, (void *) imm_values);
38152 /* Return total size of immediate operands of an instruction along with number
38153 of corresponding immediate-operands. It initializes its parameters to zero
38154 befor calling FIND_CONSTANT.
38155 INSN is the input instruction. IMM is the total of immediates.
38156 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38160 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
38162 imm_info imm_values = {0, 0, 0};
38164 find_constant (insn, &imm_values);
38165 *imm = imm_values.imm;
38166 *imm32 = imm_values.imm32;
38167 *imm64 = imm_values.imm64;
38168 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
38171 /* This function indicates if an operand of an instruction is an
38175 has_immediate (rtx insn)
38177 int num_imm_operand;
38178 int num_imm32_operand;
38179 int num_imm64_operand;
38182 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38183 &num_imm64_operand);
38187 /* Return single or double path for instructions. */
38189 static enum insn_path
38190 get_insn_path (rtx insn)
38192 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
38194 if ((int)path == 0)
38195 return path_single;
38197 if ((int)path == 1)
38198 return path_double;
38203 /* Return insn dispatch group. */
38205 static enum dispatch_group
38206 get_insn_group (rtx insn)
38208 enum dispatch_group group = get_mem_group (insn);
38212 if (is_branch (insn))
38213 return disp_branch;
38218 if (has_immediate (insn))
38221 if (is_prefetch (insn))
38222 return disp_prefetch;
38224 return disp_no_group;
38227 /* Count number of GROUP restricted instructions in a dispatch
38228 window WINDOW_LIST. */
38231 count_num_restricted (rtx insn, dispatch_windows *window_list)
38233 enum dispatch_group group = get_insn_group (insn);
38235 int num_imm_operand;
38236 int num_imm32_operand;
38237 int num_imm64_operand;
38239 if (group == disp_no_group)
38242 if (group == disp_imm)
38244 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38245 &num_imm64_operand);
38246 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
38247 || num_imm_operand + window_list->num_imm > MAX_IMM
38248 || (num_imm32_operand > 0
38249 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
38250 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
38251 || (num_imm64_operand > 0
38252 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
38253 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
38254 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
38255 && num_imm64_operand > 0
38256 && ((window_list->num_imm_64 > 0
38257 && window_list->num_insn >= 2)
38258 || window_list->num_insn >= 3)))
38264 if ((group == disp_load_store
38265 && (window_list->num_loads >= MAX_LOAD
38266 || window_list->num_stores >= MAX_STORE))
38267 || ((group == disp_load
38268 || group == disp_prefetch)
38269 && window_list->num_loads >= MAX_LOAD)
38270 || (group == disp_store
38271 && window_list->num_stores >= MAX_STORE))
38277 /* This function returns true if insn satisfies dispatch rules on the
38278 last window scheduled. */
38281 fits_dispatch_window (rtx insn)
38283 dispatch_windows *window_list = dispatch_window_list;
38284 dispatch_windows *window_list_next = dispatch_window_list->next;
38285 unsigned int num_restrict;
38286 enum dispatch_group group = get_insn_group (insn);
38287 enum insn_path path = get_insn_path (insn);
38290 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38291 instructions should be given the lowest priority in the
38292 scheduling process in Haifa scheduler to make sure they will be
38293 scheduled in the same dispatch window as the refrence to them. */
38294 if (group == disp_jcc || group == disp_cmp)
38297 /* Check nonrestricted. */
38298 if (group == disp_no_group || group == disp_branch)
38301 /* Get last dispatch window. */
38302 if (window_list_next)
38303 window_list = window_list_next;
38305 if (window_list->window_num == 1)
38307 sum = window_list->prev->window_size + window_list->window_size;
38310 || (min_insn_size (insn) + sum) >= 48)
38311 /* Window 1 is full. Go for next window. */
38315 num_restrict = count_num_restricted (insn, window_list);
38317 if (num_restrict > num_allowable_groups[group])
38320 /* See if it fits in the first window. */
38321 if (window_list->window_num == 0)
38323 /* The first widow should have only single and double path
38325 if (path == path_double
38326 && (window_list->num_uops + 2) > MAX_INSN)
38328 else if (path != path_single)
38334 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38335 dispatch window WINDOW_LIST. */
38338 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
38340 int byte_len = min_insn_size (insn);
38341 int num_insn = window_list->num_insn;
38343 sched_insn_info *window = window_list->window;
38344 enum dispatch_group group = get_insn_group (insn);
38345 enum insn_path path = get_insn_path (insn);
38346 int num_imm_operand;
38347 int num_imm32_operand;
38348 int num_imm64_operand;
38350 if (!window_list->violation && group != disp_cmp
38351 && !fits_dispatch_window (insn))
38352 window_list->violation = true;
38354 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38355 &num_imm64_operand);
38357 /* Initialize window with new instruction. */
38358 window[num_insn].insn = insn;
38359 window[num_insn].byte_len = byte_len;
38360 window[num_insn].group = group;
38361 window[num_insn].path = path;
38362 window[num_insn].imm_bytes = imm_size;
38364 window_list->window_size += byte_len;
38365 window_list->num_insn = num_insn + 1;
38366 window_list->num_uops = window_list->num_uops + num_uops;
38367 window_list->imm_size += imm_size;
38368 window_list->num_imm += num_imm_operand;
38369 window_list->num_imm_32 += num_imm32_operand;
38370 window_list->num_imm_64 += num_imm64_operand;
38372 if (group == disp_store)
38373 window_list->num_stores += 1;
38374 else if (group == disp_load
38375 || group == disp_prefetch)
38376 window_list->num_loads += 1;
38377 else if (group == disp_load_store)
38379 window_list->num_stores += 1;
38380 window_list->num_loads += 1;
38384 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38385 If the total bytes of instructions or the number of instructions in
38386 the window exceed allowable, it allocates a new window. */
38389 add_to_dispatch_window (rtx insn)
38392 dispatch_windows *window_list;
38393 dispatch_windows *next_list;
38394 dispatch_windows *window0_list;
38395 enum insn_path path;
38396 enum dispatch_group insn_group;
38404 if (INSN_CODE (insn) < 0)
38407 byte_len = min_insn_size (insn);
38408 window_list = dispatch_window_list;
38409 next_list = window_list->next;
38410 path = get_insn_path (insn);
38411 insn_group = get_insn_group (insn);
38413 /* Get the last dispatch window. */
38415 window_list = dispatch_window_list->next;
38417 if (path == path_single)
38419 else if (path == path_double)
38422 insn_num_uops = (int) path;
38424 /* If current window is full, get a new window.
38425 Window number zero is full, if MAX_INSN uops are scheduled in it.
38426 Window number one is full, if window zero's bytes plus window
38427 one's bytes is 32, or if the bytes of the new instruction added
38428 to the total makes it greater than 48, or it has already MAX_INSN
38429 instructions in it. */
38430 num_insn = window_list->num_insn;
38431 num_uops = window_list->num_uops;
38432 window_num = window_list->window_num;
38433 insn_fits = fits_dispatch_window (insn);
38435 if (num_insn >= MAX_INSN
38436 || num_uops + insn_num_uops > MAX_INSN
38439 window_num = ~window_num & 1;
38440 window_list = allocate_next_window (window_num);
38443 if (window_num == 0)
38445 add_insn_window (insn, window_list, insn_num_uops);
38446 if (window_list->num_insn >= MAX_INSN
38447 && insn_group == disp_branch)
38449 process_end_window ();
38453 else if (window_num == 1)
38455 window0_list = window_list->prev;
38456 sum = window0_list->window_size + window_list->window_size;
38458 || (byte_len + sum) >= 48)
38460 process_end_window ();
38461 window_list = dispatch_window_list;
38464 add_insn_window (insn, window_list, insn_num_uops);
38467 gcc_unreachable ();
38469 if (is_end_basic_block (insn_group))
38471 /* End of basic block is reached do end-basic-block process. */
38472 process_end_window ();
38477 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38479 DEBUG_FUNCTION static void
38480 debug_dispatch_window_file (FILE *file, int window_num)
38482 dispatch_windows *list;
38485 if (window_num == 0)
38486 list = dispatch_window_list;
38488 list = dispatch_window_list1;
38490 fprintf (file, "Window #%d:\n", list->window_num);
38491 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
38492 list->num_insn, list->num_uops, list->window_size);
38493 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38494 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
38496 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
38498 fprintf (file, " insn info:\n");
38500 for (i = 0; i < MAX_INSN; i++)
38502 if (!list->window[i].insn)
38504 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38505 i, group_name[list->window[i].group],
38506 i, (void *)list->window[i].insn,
38507 i, list->window[i].path,
38508 i, list->window[i].byte_len,
38509 i, list->window[i].imm_bytes);
38513 /* Print to stdout a dispatch window. */
38515 DEBUG_FUNCTION void
38516 debug_dispatch_window (int window_num)
38518 debug_dispatch_window_file (stdout, window_num);
38521 /* Print INSN dispatch information to FILE. */
38523 DEBUG_FUNCTION static void
38524 debug_insn_dispatch_info_file (FILE *file, rtx insn)
38527 enum insn_path path;
38528 enum dispatch_group group;
38530 int num_imm_operand;
38531 int num_imm32_operand;
38532 int num_imm64_operand;
38534 if (INSN_CODE (insn) < 0)
38537 byte_len = min_insn_size (insn);
38538 path = get_insn_path (insn);
38539 group = get_insn_group (insn);
38540 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38541 &num_imm64_operand);
38543 fprintf (file, " insn info:\n");
38544 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
38545 group_name[group], path, byte_len);
38546 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38547 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
38550 /* Print to STDERR the status of the ready list with respect to
38551 dispatch windows. */
38553 DEBUG_FUNCTION void
38554 debug_ready_dispatch (void)
38557 int no_ready = number_in_ready ();
38559 fprintf (stdout, "Number of ready: %d\n", no_ready);
38561 for (i = 0; i < no_ready; i++)
38562 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
38565 /* This routine is the driver of the dispatch scheduler. */
38568 do_dispatch (rtx insn, int mode)
38570 if (mode == DISPATCH_INIT)
38571 init_dispatch_sched ();
38572 else if (mode == ADD_TO_DISPATCH_WINDOW)
38573 add_to_dispatch_window (insn);
38576 /* Return TRUE if Dispatch Scheduling is supported. */
38579 has_dispatch (rtx insn, int action)
38581 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
38582 && flag_dispatch_scheduler)
38588 case IS_DISPATCH_ON:
38593 return is_cmp (insn);
38595 case DISPATCH_VIOLATION:
38596 return dispatch_violation ();
38598 case FITS_DISPATCH_WINDOW:
38599 return fits_dispatch_window (insn);
38605 /* Implementation of reassociation_width target hook used by
38606 reassoc phase to identify parallelism level in reassociated
38607 tree. Statements tree_code is passed in OPC. Arguments type
38610 Currently parallel reassociation is enabled for Atom
38611 processors only and we set reassociation width to be 2
38612 because Atom may issue up to 2 instructions per cycle.
38614 Return value should be fixed if parallel reassociation is
38615 enabled for other processors. */
38618 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
38619 enum machine_mode mode)
38623 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
38625 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
38631 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38632 place emms and femms instructions. */
38634 static enum machine_mode
38635 ix86_preferred_simd_mode (enum machine_mode mode)
38643 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
38645 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
38647 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
38649 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
38652 if (TARGET_AVX && !TARGET_PREFER_AVX128)
38658 if (!TARGET_VECTORIZE_DOUBLE)
38660 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
38662 else if (TARGET_SSE2)
38671 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38674 static unsigned int
38675 ix86_autovectorize_vector_sizes (void)
38677 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
38680 /* Initialize the GCC target structure. */
38681 #undef TARGET_RETURN_IN_MEMORY
38682 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38684 #undef TARGET_LEGITIMIZE_ADDRESS
38685 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38687 #undef TARGET_ATTRIBUTE_TABLE
38688 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38689 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38690 # undef TARGET_MERGE_DECL_ATTRIBUTES
38691 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38694 #undef TARGET_COMP_TYPE_ATTRIBUTES
38695 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38697 #undef TARGET_INIT_BUILTINS
38698 #define TARGET_INIT_BUILTINS ix86_init_builtins
38699 #undef TARGET_BUILTIN_DECL
38700 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38701 #undef TARGET_EXPAND_BUILTIN
38702 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38704 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38705 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38706 ix86_builtin_vectorized_function
38708 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38709 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38711 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38712 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38714 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38715 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38717 #undef TARGET_BUILTIN_RECIPROCAL
38718 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38720 #undef TARGET_ASM_FUNCTION_EPILOGUE
38721 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38723 #undef TARGET_ENCODE_SECTION_INFO
38724 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38725 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38727 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38730 #undef TARGET_ASM_OPEN_PAREN
38731 #define TARGET_ASM_OPEN_PAREN ""
38732 #undef TARGET_ASM_CLOSE_PAREN
38733 #define TARGET_ASM_CLOSE_PAREN ""
38735 #undef TARGET_ASM_BYTE_OP
38736 #define TARGET_ASM_BYTE_OP ASM_BYTE
38738 #undef TARGET_ASM_ALIGNED_HI_OP
38739 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38740 #undef TARGET_ASM_ALIGNED_SI_OP
38741 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38743 #undef TARGET_ASM_ALIGNED_DI_OP
38744 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38747 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38748 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38750 #undef TARGET_ASM_UNALIGNED_HI_OP
38751 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38752 #undef TARGET_ASM_UNALIGNED_SI_OP
38753 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38754 #undef TARGET_ASM_UNALIGNED_DI_OP
38755 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38757 #undef TARGET_PRINT_OPERAND
38758 #define TARGET_PRINT_OPERAND ix86_print_operand
38759 #undef TARGET_PRINT_OPERAND_ADDRESS
38760 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38761 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38762 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38763 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38764 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38766 #undef TARGET_SCHED_INIT_GLOBAL
38767 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38768 #undef TARGET_SCHED_ADJUST_COST
38769 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38770 #undef TARGET_SCHED_ISSUE_RATE
38771 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38772 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38773 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38774 ia32_multipass_dfa_lookahead
38776 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38777 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38780 #undef TARGET_HAVE_TLS
38781 #define TARGET_HAVE_TLS true
38783 #undef TARGET_CANNOT_FORCE_CONST_MEM
38784 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38785 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38786 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38788 #undef TARGET_DELEGITIMIZE_ADDRESS
38789 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38791 #undef TARGET_MS_BITFIELD_LAYOUT_P
38792 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38795 #undef TARGET_BINDS_LOCAL_P
38796 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38798 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38799 #undef TARGET_BINDS_LOCAL_P
38800 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38803 #undef TARGET_ASM_OUTPUT_MI_THUNK
38804 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38805 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38806 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38808 #undef TARGET_ASM_FILE_START
38809 #define TARGET_ASM_FILE_START x86_file_start
38811 #undef TARGET_OPTION_OVERRIDE
38812 #define TARGET_OPTION_OVERRIDE ix86_option_override
38814 #undef TARGET_REGISTER_MOVE_COST
38815 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38816 #undef TARGET_MEMORY_MOVE_COST
38817 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38818 #undef TARGET_RTX_COSTS
38819 #define TARGET_RTX_COSTS ix86_rtx_costs
38820 #undef TARGET_ADDRESS_COST
38821 #define TARGET_ADDRESS_COST ix86_address_cost
38823 #undef TARGET_FIXED_CONDITION_CODE_REGS
38824 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38825 #undef TARGET_CC_MODES_COMPATIBLE
38826 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38828 #undef TARGET_MACHINE_DEPENDENT_REORG
38829 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38831 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38832 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38834 #undef TARGET_BUILD_BUILTIN_VA_LIST
38835 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38837 #undef TARGET_ENUM_VA_LIST_P
38838 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38840 #undef TARGET_FN_ABI_VA_LIST
38841 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38843 #undef TARGET_CANONICAL_VA_LIST_TYPE
38844 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38846 #undef TARGET_EXPAND_BUILTIN_VA_START
38847 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38849 #undef TARGET_MD_ASM_CLOBBERS
38850 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38852 #undef TARGET_PROMOTE_PROTOTYPES
38853 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38854 #undef TARGET_STRUCT_VALUE_RTX
38855 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38856 #undef TARGET_SETUP_INCOMING_VARARGS
38857 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38858 #undef TARGET_MUST_PASS_IN_STACK
38859 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38860 #undef TARGET_FUNCTION_ARG_ADVANCE
38861 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38862 #undef TARGET_FUNCTION_ARG
38863 #define TARGET_FUNCTION_ARG ix86_function_arg
38864 #undef TARGET_FUNCTION_ARG_BOUNDARY
38865 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38866 #undef TARGET_PASS_BY_REFERENCE
38867 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38868 #undef TARGET_INTERNAL_ARG_POINTER
38869 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38870 #undef TARGET_UPDATE_STACK_BOUNDARY
38871 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38872 #undef TARGET_GET_DRAP_RTX
38873 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
38874 #undef TARGET_STRICT_ARGUMENT_NAMING
38875 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
38876 #undef TARGET_STATIC_CHAIN
38877 #define TARGET_STATIC_CHAIN ix86_static_chain
38878 #undef TARGET_TRAMPOLINE_INIT
38879 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
38880 #undef TARGET_RETURN_POPS_ARGS
38881 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
38883 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
38884 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
38886 #undef TARGET_SCALAR_MODE_SUPPORTED_P
38887 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
38889 #undef TARGET_VECTOR_MODE_SUPPORTED_P
38890 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
38892 #undef TARGET_C_MODE_FOR_SUFFIX
38893 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
38896 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
38897 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
38900 #ifdef SUBTARGET_INSERT_ATTRIBUTES
38901 #undef TARGET_INSERT_ATTRIBUTES
38902 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
38905 #undef TARGET_MANGLE_TYPE
38906 #define TARGET_MANGLE_TYPE ix86_mangle_type
38909 #undef TARGET_STACK_PROTECT_FAIL
38910 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
38913 #undef TARGET_FUNCTION_VALUE
38914 #define TARGET_FUNCTION_VALUE ix86_function_value
38916 #undef TARGET_FUNCTION_VALUE_REGNO_P
38917 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
38919 #undef TARGET_PROMOTE_FUNCTION_MODE
38920 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
38922 #undef TARGET_SECONDARY_RELOAD
38923 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
38925 #undef TARGET_CLASS_MAX_NREGS
38926 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
38928 #undef TARGET_PREFERRED_RELOAD_CLASS
38929 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
38930 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
38931 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
38932 #undef TARGET_CLASS_LIKELY_SPILLED_P
38933 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
38935 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
38936 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
38937 ix86_builtin_vectorization_cost
38938 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
38939 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
38940 ix86_vectorize_vec_perm_const_ok
38941 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
38942 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
38943 ix86_preferred_simd_mode
38944 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
38945 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
38946 ix86_autovectorize_vector_sizes
38948 #undef TARGET_SET_CURRENT_FUNCTION
38949 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
38951 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
38952 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
38954 #undef TARGET_OPTION_SAVE
38955 #define TARGET_OPTION_SAVE ix86_function_specific_save
38957 #undef TARGET_OPTION_RESTORE
38958 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
38960 #undef TARGET_OPTION_PRINT
38961 #define TARGET_OPTION_PRINT ix86_function_specific_print
38963 #undef TARGET_CAN_INLINE_P
38964 #define TARGET_CAN_INLINE_P ix86_can_inline_p
38966 #undef TARGET_EXPAND_TO_RTL_HOOK
38967 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
38969 #undef TARGET_LEGITIMATE_ADDRESS_P
38970 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
38972 #undef TARGET_LEGITIMATE_CONSTANT_P
38973 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
38975 #undef TARGET_FRAME_POINTER_REQUIRED
38976 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
38978 #undef TARGET_CAN_ELIMINATE
38979 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
38981 #undef TARGET_EXTRA_LIVE_ON_ENTRY
38982 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
38984 #undef TARGET_ASM_CODE_END
38985 #define TARGET_ASM_CODE_END ix86_code_end
38987 #undef TARGET_CONDITIONAL_REGISTER_USAGE
38988 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
38991 #undef TARGET_INIT_LIBFUNCS
38992 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
38995 struct gcc_target targetm = TARGET_INITIALIZER;
38997 #include "gt-i386.h"